def check_mk_remote_automation(site_id, command, args, indata, stdin_data=None, timeout=None, sync=True): site = config.site(site_id) if "secret" not in site: raise MKGeneralException( _("Cannot connect to site \"%s\": The site is not logged in") % site.get("alias", site_id)) if not site.get("replication"): raise MKGeneralException( _("Cannot connect to site \"%s\": The replication is disabled") % site.get("alias", site_id)) if sync: sync_changes_before_remote_automation(site_id) # Now do the actual remote command response = do_remote_automation( config.site(site_id), "checkmk-automation", [ ("automation", command), # The Check_MK automation command ("arguments", mk_repr(args)), # The arguments for the command ("indata", mk_repr(indata)), # The input data ("stdin_data", mk_repr(stdin_data)), # The input data for stdin ("timeout", mk_repr(timeout)), # The timeout ]) return response
def page(self): ajax_request = self.webapi_request() site_id_val = ajax_request.get("site") if not site_id_val: raise MKUserError(None, "The site_id is missing") site_id = site_id_val if site_id not in config.sitenames(): raise MKUserError(None, _("The requested site does not exist")) status = cmk.gui.sites.states().get(site_id, cmk.gui.sites.SiteStatus({})).get( "state", "unknown") if status == "dead": raise MKGeneralException( _('The site is marked as dead. Not trying to replicate.')) site = config.site(site_id) assert config.user.id is not None result = self._synchronize_profile(site_id, site, config.user.id) if result is not True: assert result is not False _add_profile_replication_change(site_id, result) raise MKGeneralException(result) return _("Replication completed successfully.")
def _get_diagnostics_dump_file(self, site: str, tarfile_name: str) -> bytes: if config.site_is_local(site): return _get_diagnostics_dump_file(tarfile_name) return do_remote_automation(config.site(site), "diagnostics-dump-get-file", [ ("tarfile_name", tarfile_name), ])
def _do_check_mk_remote_automation_in_background_job( site_id: SiteId, automation_request: CheckmkAutomationRequest) -> Any: """Execute the automation in a background job on the remote site It starts the background job using one call. It then polls the remote site, waiting for completion of the job.""" site_config = config.site(site_id) job_id = _start_remote_automation_job(site_config, automation_request) auto_logger.info("Waiting for job completion") result = None while True: raw_response = do_remote_automation( site_config, "checkmk-remote-automation-get-status", [ ("request", repr(job_id)), ]) response = CheckmkAutomationGetStatusResponse(*raw_response) auto_logger.debug("Job status: %r", response) if not response.job_status["is_active"]: result = response.result auto_logger.debug( "Job is not active anymore. Return the result: %s", result) break return result
def _synchronize_profiles_to_sites(logger, profiles_to_synchronize): if not profiles_to_synchronize: return remote_sites = [(site_id, config.site(site_id)) for site_id in config.get_login_slave_sites()] logger.info('Credentials changed for %s. Trying to sync to %d sites' % (", ".join(profiles_to_synchronize.keys()), len(remote_sites))) states = sites.states() pool = ThreadPool() jobs = [] for site_id, site in remote_sites: jobs.append( pool.apply_async(_sychronize_profile_worker, (states, site_id, site, profiles_to_synchronize))) results = [] start_time = time.time() while time.time() - start_time < 30: for job in jobs[:]: try: results.append(job.get(timeout=0.5)) jobs.remove(job) except mp_TimeoutError: pass if not jobs: break contacted_sites = {x[0] for x in remote_sites} working_sites = {result.site_id for result in results} for site_id in contacted_sites - working_sites: results.append( SynchronizationResult( site_id, error_text=_("No response from update thread"), failed=True)) for result in results: if result.error_text: logger.info(' FAILED [%s]: %s' % (result.site_id, result.error_text)) if config.wato_enabled: add_change("edit-users", _('Password changed (sync failed: %s)') % result.error_text, add_user=False, sites=[result.site_id], need_restart=False) pool.terminate() pool.join() num_failed = sum([1 for result in results if result.failed]) num_disabled = sum([1 for result in results if result.disabled]) num_succeeded = sum([1 for result in results if result.succeeded]) logger.info(' Disabled: %d, Succeeded: %d, Failed: %d' % (num_disabled, num_succeeded, num_failed))
def execute(self): # type: () -> Iterator[ACResult] for site_id in config.sitenames(): site_config = config.site(site_id) for result in self._check_site(site_id, site_config): result.site_id = site_id yield result
def _get_job_status(self) -> Dict: if config.site_is_local(self._request.host.site_id()): return get_fetch_agent_job_status(self._request) return watolib.do_remote_automation(config.site(self._request.host.site_id()), "fetch-agent-output-get-status", [ ("request", repr(self._request.serialize())), ])
def _get_agent_output_file(self) -> bytes: if config.site_is_local(self._request.host.site_id()): return get_fetch_agent_output_file(self._request) return watolib.do_remote_automation(config.site(self._request.host.site_id()), "fetch-agent-output-get-file", [ ("request", repr(self._request.serialize())), ])
def _collect_sites_data(cls) -> List[ABCElement]: sites.update_site_states_from_dead_sites() site_states = sites.states() site_state_titles = sites.site_state_titles() site_stats = cls._get_site_stats() elements: List[ABCElement] = [] for site_id, _sitealias in config.sorted_sites(): site_spec = config.site(site_id) site_status = site_states.get(site_id, sites.SiteStatus({})) state: Optional[str] = site_status.get("state") if state is None: state = "missing" if state != "online": elements.append( IconElement( title=site_spec["alias"], css_class="site_%s" % state, tooltip=site_state_titles[state], )) continue stats = site_stats[site_id] parts = [] total = 0 for title, css_class, count in [ (_("hosts are down or have critical services"), "critical", stats.hosts_down_or_have_critical), (_("hosts are unreachable or have unknown services"), "unknown", stats.hosts_unreachable_or_have_unknown), (_("hosts are up but have services in warning state"), "warning", stats.hosts_up_and_have_warning), (_("hosts are in scheduled downtime"), "downtime", stats.hosts_in_downtime), (_("hosts are up and have no service problems"), "ok", stats.hosts_up_without_problem), ]: parts.append(Part(title=title, css_class=css_class, count=count)) total += count total_part = Part(title=_("Total number of hosts"), css_class="", count=total) elements.append( SiteElement( title=site_spec["alias"], url_add_vars={ "name": "site", "site": site_id, }, parts=parts, total=total_part, tooltip=cls._render_tooltip(site_spec["alias"], parts, total_part), )) #return elements + cls._test_elements() return elements
def execute_host_label_sync(host_name: HostName, site_id: SiteId) -> None: """Contacts the given remote site to synchronize the labels of the given host""" site_spec = config.site(site_id) result = _execute_site_sync( site_id, site_spec, SiteRequest( newest_host_labels=0.0, enforce_host=EnforcedHostRequest(site_id, host_name), )) save_updated_host_label_files(result.updated_host_labels)
def _start_fetch(self) -> None: """Start the job on the site the host is monitored by""" if config.site_is_local(self._request.host.site_id()): start_fetch_agent_job(self._request) return watolib.do_remote_automation(config.site( self._request.host.site_id()), "fetch-agent-output-start", [ ("request", repr(self._request.serialize())), ])
def check_mk_remote_automation(site_id, command, args, indata, stdin_data=None, timeout=None, sync=True, non_blocking_http=False): # type: (SiteId, str, Optional[Sequence[Union[str, Text]]], Any, Optional[str], Optional[int], bool, bool) -> Any site = config.site(site_id) if "secret" not in site: raise MKGeneralException( _("Cannot connect to site \"%s\": The site is not logged in") % site.get("alias", site_id)) if not site.get("replication"): raise MKGeneralException( _("Cannot connect to site \"%s\": The replication is disabled") % site.get("alias", site_id)) if sync: sync_changes_before_remote_automation(site_id) if non_blocking_http: # This will start a background job process on the remote site to execute the automation # asynchronously. It then polls the remote site, waiting for completion of the job. return _do_check_mk_remote_automation_in_background_job( site_id, CheckmkAutomationRequest(command, args, indata, stdin_data, timeout)) # Synchronous execution of the actual remote command in a single blocking HTTP request return do_remote_automation( config.site(site_id), "checkmk-automation", [ ("automation", command), # The Check_MK automation command ("arguments", mk_repr(args)), # The arguments for the command ("indata", mk_repr(indata)), # The input data ("stdin_data", mk_repr(stdin_data)), # The input data for stdin ("timeout", mk_repr(timeout)), # The timeout ])
def _choices(self): if self.enforce: choices = [] else: choices = [("", "")] for sitename, state in sites.states().items(): if state["state"] == "online": choices.append((sitename, config.site(sitename)["alias"])) return sorted(choices, key=lambda a: a[1].lower())
def _execute_remote_automation(self, request): if request["site_id"] not in config.sitenames(): raise MKUserError("site_id", _("This site does not exist.")) if request["site_id"] not in dict(config.wato_slave_sites()): raise MKUserError("site_id", _("This site is not a distributed WATO site.")) return cmk.gui.watolib.automations.do_remote_automation( config.site(request["site_id"]), request["command"], request["command_args"])
def host_service_graph_popup_pnp(site, host_name, service_description): pnp_host = cmk.utils.pnp_cleanup(host_name) pnp_svc = cmk.utils.pnp_cleanup(service_description) url_prefix = config.site(site)["url_prefix"] if html.mobile: url = url_prefix + ("pnp4nagios/index.php?kohana_uri=/mobile/popup/%s/%s" % \ (html.urlencode(pnp_host), html.urlencode(pnp_svc))) else: url = url_prefix + ("pnp4nagios/index.php/popup?host=%s&srv=%s" % \ (html.urlencode(pnp_host), html.urlencode(pnp_svc))) html.write(url)
def _get_check_table_from_remote(request): """Gathers the check table from a remote site Cares about pre 1.6 sites that does not support the new service-discovery-job API call. Falling back to the previously existing try-inventry and inventory automation calls. """ try: sync_changes_before_remote_automation(request.host.site_id()) return _deserialize_remote_result( watolib.do_remote_automation(config.site( request.host.site_id()), "service-discovery-job", [ ("host_name", request.host.name()), ("options", json.dumps(request.options._asdict())), ])) except watolib.MKAutomationException as e: if "Invalid automation command: service-discovery-job" not in "%s" % e: raise # Compatibility for pre 1.6 remote sites. # TODO: Replace with helpful exception in 1.7. if request.options.action == DiscoveryAction.REFRESH: _counts, _failed_hosts = check_mk_automation( request.host.site_id(), "inventory", ["@scan", "refresh", request.host.name()]) if request.options.action == DiscoveryAction.SCAN: options = ["@scan"] else: options = ["@noscan"] if not request.options.ignore_errors: options.append("@raiseerrors") options.append(request.host.name()) check_table = check_mk_automation(request.host.site_id(), "try-inventory", options) return DiscoveryResult( job_status={ "is_active": False, "state": JobStatusStates.INITIALIZED, }, check_table=check_table, check_table_created=int(time.time()), host_labels={}, new_labels={}, vanished_labels={}, changed_labels={}, )
def _perform_tests_for_site( self, site_id: SiteId, result_queue: 'multiprocessing.Queue[Tuple[SiteId, str]]') -> None: self._logger.debug("[%s] Starting" % site_id) try: # Would be better to clean all open fds that are not needed, but we don't # know the FDs of the result_queue pipe. Can we find it out somehow? # Cleanup resources of the apache #for x in range(3, 256): # try: # os.close(x) # except OSError, e: # if e.errno == errno.EBADF: # pass # else: # raise # Reinitialize logging targets log.init_logging() # NOTE: We run in a subprocess! if config.site_is_local(site_id): automation = AutomationCheckAnalyzeConfig() results_data = automation.execute(automation.get_request()) else: results_data = watolib.do_remote_automation( config.site(site_id), "check-analyze-config", [], timeout=html.request.request_timeout - 10) self._logger.debug("[%s] Finished" % site_id) result = { "state": 0, "response": results_data, } except Exception: self._logger.exception("[%s] Failed" % site_id) result = { "state": 1, "response": "Traceback:<br>%s" % (traceback.format_exc().replace("\n", "<br>\n")), } finally: result_queue.put((site_id, repr(result))) result_queue.close() result_queue.join_thread() result_queue.join()
def show(self) -> None: html.open_table(cellspacing="0", class_="sitestate") sites.update_site_states_from_dead_sites() for sitename, _sitealias in config.sorted_sites(): site = config.site(sitename) state = sites.states().get(sitename, sites.SiteStatus({})).get("state") if state is None: state = "missing" switch = "missing" text = sitename else: if state == "disabled": switch = "on" text = site["alias"] else: switch = "off" text = render_link( site["alias"], "view.py?view_name=sitehosts&site=%s" % sitename) html.open_tr() html.open_td(class_="left") html.write(text) html.close_td() html.open_td(class_="state") if switch == "missing": html.status_label(content=state, status=state, title=_("Site is missing")) else: url = makeactionuri_contextless(request, transactions, [ ("_site_switch", "%s:%s" % (sitename, switch)), ], filename="switch_site.py") html.status_label_button( content=state, status=state, title=_("enable this site") if state == "disabled" else _("disable this site"), onclick="cmk.sidebar.switch_site(%s)" % (json.dumps(url))) html.close_tr() html.close_table()
def host_service_graph_dashlet_pnp(graph_identification): site = graph_identification[1]["site"] source = int(graph_identification[1]["graph_index"]) pnp_host = cmk.utils.pnp_cleanup(graph_identification[1]["host_name"]) pnp_svc = cmk.utils.pnp_cleanup( graph_identification[1]["service_description"]) url_prefix = config.site(site)["url_prefix"] pnp_theme = html.get_theme() if pnp_theme == "classic": pnp_theme = "multisite" html.write(url_prefix + "pnp4nagios/index.php/image?host=%s&srv=%s&source=%d&view=%s&theme=%s" % \ (html.urlencode(pnp_host), html.urlencode(pnp_svc), source, html.request.var("timerange"), pnp_theme))
def render(self, what, row, tags, custom_vars): # TODO: At least for interfaces we have 2 predictive values. But this icon # only creates a link to the first one. Add multiple icons or add a navigation # element to the prediction page. if what == "service": parts = row[what + "_perf_data"].split() for p in parts: if p.startswith("predict_"): varname, _value = p.split("=") dsname = varname[8:] sitename = row["site"] url_prefix = config.site(sitename)["url_prefix"] url = url_prefix + "check_mk/prediction_graph.py?" + html.urlencode_vars([ ("host", row["host_name"]), ("service", row["service_description"]), ("dsname", dsname), ]) title = _("Analyse predictive monitoring for this service") return 'prediction', title, url
def _call_activate_changes_automation(self): domains = self._get_domains_needing_activation() if config.site_is_local(self._site_id): return execute_activate_changes(domains) try: response = cmk.gui.watolib.automations.do_remote_automation( config.site(self._site_id), "activate-changes", [ ("domains", repr(domains)), ("site_id", self._site_id), ]) except cmk.gui.watolib.automations.MKAutomationException as e: if "Invalid automation command: activate-changes" in "%s" % e: raise MKGeneralException( "Activate changes failed (%s). The version of this site may be too old.") else: raise return response
def _push_snapshot_to_site(self): """Calls a remote automation call push-snapshot which is handled by AutomationPushSnapshot()""" site = config.site(self._site_id) url = html.makeuri_contextless( [ ("command", "push-snapshot"), ("secret", site["secret"]), ("siteid", site["id"]), ("debug", config.debug and "1" or ""), ], filename=site["multisiteurl"] + "automation.py", ) response_text = self._upload_file(url, site.get('insecure', False)) try: return ast.literal_eval(response_text) except SyntaxError: raise cmk.gui.watolib.automations.MKAutomationException( _("Garbled automation response: <pre>%s</pre>") % (html.attrencode(response_text)))
def cmp(self, r1, r2): return (config.site(r1["site"])["alias"] > config.site(r2["site"])["alias"]) - (config.site( r1["site"])["alias"] < config.site(r2["site"])["alias"])
def render(self, what, row, tags, custom_vars): if not config.mkeventd_enabled: return # show for services based on the mkevents active check command = row[what + '_check_command'] if what != 'service' or not command.startswith('check_mk_active-mkevents'): return # Split command by the parts (COMMAND!ARG0!...) Beware: Do not split by escaped exclamation mark. splitted_command = re.split(r'(?<!\\)!', command) # All arguments are space separated in in ARG0 if len(splitted_command) != 2: return host = None app = None # Extract parameters from check_command: # TODO: Use better argument string splitting (shlex.split()) args = splitted_command[1].split() if not args: return # Handle -a and -H options. Sorry for the hack. We currently # have no better idea if len(args) >= 2 and args[0] == '-H': args = args[2:] # skip two arguments if len(args) >= 1 and args[0] == '-a': args = args[1:] if len(args) >= 1: if args[0] == '$HOSTNAME$': host = row['host_name'] elif args[0] == '$HOSTADDRESS$': host = row['host_address'] else: host = args[0] # If we have no host then the command line from the check_command seems # to be garbled. Better show nothing in this case. if not host: return # It is possible to have a central event console, this is the default case. # Another possible architecture is to have an event console in each site in # a distributed environment. For the later case the base url need to be # constructed here url_prefix = '' if getattr(config, 'mkeventd_distributed', False): site = config.site(row["site"]) url_prefix = site['url_prefix'] + 'check_mk/' url_vars = [ ("view_name", "ec_events_of_monhost"), ("site", row["site"]), ("host", row["host_name"]), ] title = _('Events of Host %s') % (row["host_name"]) if len(args) >= 2: app = args[1].strip('\'').replace("\\\\", "\\").replace("\\!", "!") title = _('Events of Application "%s" on Host %s') % (app, host) url_vars.append(("event_application", app)) url = 'view.py?' + html.urlencode_vars(url_vars) return 'mkeventd', title, url_prefix + url
def cmp(self, r1, r2): return cmp( config.site(r1["site"])["alias"], config.site(r2["site"])["alias"])
def filter_cre_heading_info(): current_value = html.request.var("site") return config.site(current_value)["alias"] if current_value else None
def filter_cre_choices(): return sorted([(sitename, config.site(sitename)["alias"]) for sitename, state in sites.states().items() if state["state"] == "online"], key=lambda a: a[1].lower())
def execute_network_scan_job() -> None: init_wato_datastructures(with_wato_lock=True) if watolib.is_wato_slave_site(): return # Don't execute this job on slaves. folder = find_folder_to_scan() if not folder: return # Nothing to do. # We need to have the context of the user. The jobs are executed when # config.set_user_by_id() has not been executed yet. So there is no user context # available. Use the run_as attribute from the job config and revert # the previous state after completion. old_user = config.user.id run_as = folder.attribute("network_scan")["run_as"] if not userdb.user_exists(run_as): raise MKGeneralException( _("The user %s used by the network " "scan of the folder %s does not exist.") % (run_as, folder.title())) config.set_user_by_id(folder.attribute("network_scan")["run_as"]) result: NetworkScanResult = { "start": time.time(), "end": True, # means currently running "state": None, "output": "The scan is currently running.", } # Mark the scan in progress: Is important in case the request takes longer than # the interval of the cron job (1 minute). Otherwise the scan might be started # a second time before the first one finished. save_network_scan_result(folder, result) try: if config.site_is_local(folder.site_id()): found = cmk.gui.watolib.network_scan.do_network_scan(folder) else: found = watolib.do_remote_automation(config.site(folder.site_id()), "network-scan", [("folder", folder.path())]) if not isinstance(found, list): raise MKGeneralException(_("Received an invalid network scan result: %r") % found) add_scanned_hosts_to_folder(folder, found) result.update({ "state": True, "output": _("The network scan found %d new hosts.") % len(found), }) except Exception as e: result.update({ "state": False, "output": _("An exception occured: %s") % e, }) logger.error("Exception in network scan:\n%s", traceback.format_exc()) result["end"] = time.time() save_network_scan_result(folder, result) if old_user: config.set_user_by_id(old_user)
def execute(self): for site_id in config.sitenames(): site_config = config.site(site_id) for result in self._check_site(site_id, site_config): result.site_id = site_id yield result
def user_sync_config() -> UserSyncConfig: # use global option as default for reading legacy options and on remote site # for reading the value set by the WATO master site default_cfg = user_sync_default_config(config.omd_site()) return config.site(config.omd_site()).get("user_sync", default_cfg)