def _synchronize_profiles_to_sites(logger, profiles_to_synchronize): if not profiles_to_synchronize: return remote_sites = [(site_id, get_site_config(site_id)) for site_id in get_login_slave_sites()] logger.info('Credentials changed for %s. Trying to sync to %d sites' % (", ".join(profiles_to_synchronize.keys()), len(remote_sites))) states = sites.states() pool = ThreadPool() jobs = [] for site_id, site in remote_sites: jobs.append( pool.apply_async(_sychronize_profile_worker, (states, site_id, site, profiles_to_synchronize))) results = [] start_time = time.time() while time.time() - start_time < 30: for job in jobs[:]: try: results.append(job.get(timeout=0.5)) jobs.remove(job) except mp_TimeoutError: pass if not jobs: break contacted_sites = {x[0] for x in remote_sites} working_sites = {result.site_id for result in results} for site_id in contacted_sites - working_sites: results.append( SynchronizationResult( site_id, error_text=_("No response from update thread"), failed=True)) for result in results: if result.error_text: logger.info(' FAILED [%s]: %s' % (result.site_id, result.error_text)) if config.wato_enabled: add_change("edit-users", _('Password changed (sync failed: %s)') % result.error_text, add_user=False, sites=[result.site_id], need_restart=False) pool.terminate() pool.join() num_failed = sum([1 for result in results if result.failed]) num_disabled = sum([1 for result in results if result.disabled]) num_succeeded = sum([1 for result in results if result.succeeded]) logger.info(' Disabled: %d, Succeeded: %d, Failed: %d' % (num_disabled, num_succeeded, num_failed))
def page(self): ajax_request = self.webapi_request() site_id_val = ajax_request.get("site") if not site_id_val: raise MKUserError(None, "The site_id is missing") site_id = site_id_val if site_id not in sitenames(): raise MKUserError(None, _("The requested site does not exist")) status = (cmk.gui.sites.states().get(site_id, cmk.gui.sites.SiteStatus({})).get( "state", "unknown")) if status == "dead": raise MKGeneralException( _("The site is marked as dead. Not trying to replicate.")) site = get_site_config(site_id) assert user.id is not None result = self._synchronize_profile(site_id, site, user.id) if result is not True: assert result is not False _add_profile_replication_change(site_id, result) raise MKGeneralException(result) return _("Replication completed successfully.")
def _do_check_mk_remote_automation_in_background_job( site_id: SiteId, automation_request: CheckmkAutomationRequest) -> Any: """Execute the automation in a background job on the remote site It starts the background job using one call. It then polls the remote site, waiting for completion of the job.""" site_config = get_site_config(site_id) job_id = _start_remote_automation_job(site_config, automation_request) auto_logger.info("Waiting for job completion") result = None while True: raw_response = do_remote_automation( site_config, "checkmk-remote-automation-get-status", [ ("request", repr(job_id)), ]) response = CheckmkAutomationGetStatusResponse(*raw_response) auto_logger.debug("Job status: %r", response) if not response.job_status["is_active"]: result = response.result auto_logger.debug( "Job is not active anymore. Return the result: %s", result) break return result
def filter_cre_choices(): return sorted( [(sitename, get_site_config(sitename)["alias"]) for sitename, state in sites.states().items() if state["state"] == "online"], key=lambda a: a[1].lower(), )
def check_mk_remote_automation_serialized( *, site_id: SiteId, command: str, args: Optional[Sequence[str]], indata: Any, stdin_data: Optional[str] = None, timeout: Optional[int] = None, sync: bool = True, non_blocking_http: bool = False, ) -> SerializedResult: site = get_site_config(site_id) if "secret" not in site: raise MKGeneralException( _('Cannot connect to site "%s": The site is not logged in') % site.get("alias", site_id)) if not site.get("replication"): raise MKGeneralException( _('Cannot connect to site "%s": The replication is disabled') % site.get("alias", site_id)) if sync: sync_changes_before_remote_automation(site_id) if non_blocking_http: # This will start a background job process on the remote site to execute the automation # asynchronously. It then polls the remote site, waiting for completion of the job. return _do_check_mk_remote_automation_in_background_job_serialized( site_id, CheckmkAutomationRequest(command, args, indata, stdin_data, timeout)) # Synchronous execution of the actual remote command in a single blocking HTTP request return SerializedResult( _do_remote_automation_serialized( site=get_site_config(site_id), command="checkmk-automation", vars_=[ ("automation", command), # The Checkmk automation command ("arguments", mk_repr(args)), # The arguments for the command ("indata", mk_repr(indata)), # The input data ("stdin_data", mk_repr(stdin_data)), # The input data for stdin ("timeout", mk_repr(timeout)), # The timeout ], ))
def cre_sites_options() -> Options: return sorted( [(sitename, sites.get_site_config(sitename)["alias"]) for sitename, state in sites.states().items() if state["state"] == "online"], key=lambda a: a[1].lower(), )
def _get_diagnostics_dump_file(self, site: str, tarfile_name: str) -> bytes: if site_is_local(site): return _get_diagnostics_dump_file(tarfile_name) return do_remote_automation(get_site_config(site), "diagnostics-dump-get-file", [ ("tarfile_name", tarfile_name), ])
def _get_agent_output_file(self) -> bytes: if site_is_local(self._request.host.site_id()): return get_fetch_agent_output_file(self._request) return watolib.do_remote_automation( get_site_config(self._request.host.site_id()), "fetch-agent-output-get-file", [ ("request", repr(self._request.serialize())), ])
def _get_job_status(self) -> Dict: if site_is_local(self._request.host.site_id()): return get_fetch_agent_job_status(self._request) return watolib.do_remote_automation( get_site_config(self._request.host.site_id()), "fetch-agent-output-get-status", [ ("request", repr(self._request.serialize())), ])
def _execute_remote_automation(self, request): if request["site_id"] not in sitenames(): raise MKUserError("site_id", _("This site does not exist.")) if request["site_id"] not in wato_slave_sites(): raise MKUserError("site_id", _("This site is not a distributed WATO site.")) return cmk.gui.watolib.automations.do_remote_automation( get_site_config(request["site_id"]), request["command"], request["command_args"] )
def execute_host_label_sync(host_name: HostName, site_id: SiteId) -> None: """Contacts the given remote site to synchronize the labels of the given host""" site_spec = get_site_config(site_id) result = _execute_site_sync( site_id, site_spec, SiteRequest( newest_host_labels=0.0, enforce_host=EnforcedHostRequest(site_id, host_name), )) save_updated_host_label_files(result.updated_host_labels)
def _start_fetch(self) -> None: """Start the job on the site the host is monitored by""" if site_is_local(self._request.host.site_id()): start_fetch_agent_job(self._request) return watolib.do_remote_automation( get_site_config(self._request.host.site_id()), "fetch-agent-output-start", [ ("request", repr(self._request.serialize())), ])
def _get_check_table_from_remote(api_request): """Gathers the check table from a remote site Cares about pre 1.6 sites that does not support the new service-discovery-job API call. Falling back to the previously existing try-inventry and inventory automation calls. """ try: sync_changes_before_remote_automation(api_request.host.site_id()) return _deserialize_remote_result( watolib.do_remote_automation( get_site_config(api_request.host.site_id()), "service-discovery-job", [ ("host_name", api_request.host.name()), ("options", json.dumps(api_request.options._asdict())), ])) except watolib.MKAutomationException as e: if "Invalid automation command: service-discovery-job" not in "%s" % e: raise # Compatibility for pre 1.6 remote sites. # TODO: Replace with helpful exception in 1.7. if api_request.options.action == DiscoveryAction.TABULA_RASA: _counts, _failed_hosts = check_mk_automation( api_request.host.site_id(), "inventory", ["@scan", "refresh", api_request.host.name()]) if api_request.options.action == DiscoveryAction.REFRESH: options = ["@scan"] else: options = ["@noscan"] if not api_request.options.ignore_errors: options.append("@raiseerrors") options.append(api_request.host.name()) check_table = check_mk_automation(api_request.host.site_id(), "try-inventory", options) return DiscoveryResult( job_status={ "is_active": False, "state": JobStatusStates.INITIALIZED, }, check_table=check_table, check_table_created=int(time.time()), host_labels={}, new_labels={}, vanished_labels={}, changed_labels={}, )
def _perform_tests_for_site( self, site_id: SiteId, result_queue: "multiprocessing.Queue[Tuple[SiteId, str]]") -> None: self._logger.debug("[%s] Starting" % site_id) try: # Would be better to clean all open fds that are not needed, but we don't # know the FDs of the result_queue pipe. Can we find it out somehow? # Cleanup resources of the apache # for x in range(3, 256): # try: # os.close(x) # except OSError, e: # if e.errno == errno.EBADF: # pass # else: # raise # Reinitialize logging targets log.init_logging() # NOTE: We run in a subprocess! if site_is_local(site_id): automation = AutomationCheckAnalyzeConfig() results_data = automation.execute(automation.get_request()) else: results_data = watolib.do_remote_automation( get_site_config(site_id), "check-analyze-config", [], timeout=request.request_timeout - 10, ) self._logger.debug("[%s] Finished" % site_id) result = { "state": 0, "response": results_data, } except Exception: self._logger.exception("[%s] Failed" % site_id) result = { "state": 1, "response": "Traceback:<br>%s" % (traceback.format_exc().replace("\n", "<br>\n")), } finally: result_queue.put((site_id, repr(result))) result_queue.close() result_queue.join_thread() result_queue.join()
def show(self) -> None: html.open_table(cellspacing="0", class_="sitestate") sites.update_site_states_from_dead_sites() for sitename, _sitealias in sites.sorted_sites(): site = sites.get_site_config(sitename) state = sites.states().get(sitename, sites.SiteStatus({})).get("state") if state is None: state = "missing" switch = "missing" text = escape_html_permissive(sitename) else: if state == "disabled": switch = "on" text = escape_html_permissive(site["alias"]) else: switch = "off" text = render_link( site["alias"], "view.py?view_name=sitehosts&site=%s" % sitename) html.open_tr() html.td(text, class_="left") html.open_td(class_="state") if switch == "missing": html.status_label(content=state, status=state, title=_("Site is missing")) else: url = makeactionuri_contextless( request, transactions, [ ("_site_switch", "%s:%s" % (sitename, switch)), ], filename="switch_site.py", ) html.status_label_button( content=state, status=state, title=_("enable this site") if state == "disabled" else _("disable this site"), onclick="cmk.sidebar.switch_site(%s)" % (json.dumps(url)), ) html.close_tr() html.close_table()
def get_check_table( discovery_request: StartDiscoveryRequest) -> DiscoveryResult: """Gathers the check table using a background job Cares about handling local / remote sites using an automation call. In both cases the ServiceDiscoveryBackgroundJob is executed to care about collecting the check table asynchronously. In case of a remote site the chain is: Starting from central site: _get_check_table() | v automation service-discovery-job-discover | v to remote site | v AutomationServiceDiscoveryJob().execute() | v _get_check_table() """ if discovery_request.options.action == DiscoveryAction.TABULA_RASA: watolib.add_service_change( discovery_request.host, "refresh-autochecks", _("Refreshed check configuration of host '%s'") % discovery_request.host.name(), ) if site_is_local(discovery_request.host.site_id()): return execute_discovery_job(discovery_request) sync_changes_before_remote_automation(discovery_request.host.site_id()) return DiscoveryResult.deserialize( watolib.do_remote_automation( get_site_config(discovery_request.host.site_id()), "service-discovery-job", [ ("host_name", discovery_request.host.name()), ("options", json.dumps(discovery_request.options._asdict())), ], ))
def get_page_heading() -> str: if "%s" in config.page_heading: return config.page_heading % (get_site_config(omd_site()).get( 'alias', _("GUI"))) return config.page_heading
def render(self, what, row, tags, custom_vars): if not config.mkeventd_enabled: return # show for services based on the mkevents active check command = row[what + "_check_command"] if what != "service" or not command.startswith("check_mk_active-mkevents"): return # Split command by the parts (COMMAND!ARG0!...) Beware: Do not split by escaped exclamation mark. splitted_command = re.split(r"(?<!\\)!", command) # All arguments are space separated in in ARG0 if len(splitted_command) != 2: return host = None app = None # Extract parameters from check_command args = shlex.split(splitted_command[1]) if not args: return # Handle -a and -H options. Sorry for the hack. We currently # have no better idea if len(args) >= 2 and args[0] == "-H": args = args[2:] # skip two arguments if len(args) >= 1 and args[0] == "-a": args = args[1:] if len(args) >= 1: host = _get_hostname(args, row) # If we have no host then the command line from the check_command seems # to be garbled. Better show nothing in this case. if not host: return # It is possible to have a central event console, this is the default case. # Another possible architecture is to have an event console in each site in # a distributed environment. For the later case the base url need to be # constructed here url_prefix = "" if getattr(config, "mkeventd_distributed", False): site = get_site_config(row["site"]) url_prefix = site["url_prefix"] + "check_mk/" url_vars = [ ("view_name", "ec_events_of_monhost"), ("site", row["site"]), ("host", row["host_name"]), ] title = _("Events of Host %s") % (row["host_name"]) if len(args) >= 2: app = args[1].strip("'").replace("\\\\", "\\").replace("\\!", "!") title = _('Events of Application "%s" on Host %s') % (app, host) url_vars.append(("event_application", app)) url = "view.py?" + urlencode_vars(url_vars) return "mkeventd", title, url_prefix + url
def cmp(self, r1, r2): return (get_site_config(r1["site"])["alias"] > get_site_config( r2["site"])["alias"]) - (get_site_config(r1["site"])["alias"] < get_site_config(r2["site"])["alias"])
def execute_network_scan_job() -> None: """Executed by the multisite cron job once a minute. Is only executed in the central site. Finds the next folder to scan and starts it via WATO automation. The result is written to the folder in the master site.""" init_wato_datastructures(with_wato_lock=True) if is_wato_slave_site(): return # Don't execute this job on slaves. folder = _find_folder_to_scan() if not folder: return # Nothing to do. run_as = folder.attribute("network_scan")["run_as"] if not userdb.user_exists(run_as): raise MKGeneralException( _("The user %s used by the network " "scan of the folder %s does not exist.") % (run_as, folder.title())) with UserContext(run_as): result: NetworkScanResult = { "start": time.time(), "end": True, # means currently running "state": None, "output": "The scan is currently running.", } # Mark the scan in progress: Is important in case the request takes longer than # the interval of the cron job (1 minute). Otherwise the scan might be started # a second time before the first one finished. _save_network_scan_result(folder, result) try: if site_is_local(folder.site_id()): found = _do_network_scan(folder) else: found = do_remote_automation(get_site_config(folder.site_id()), "network-scan", [("folder", folder.path())]) if not isinstance(found, list): raise MKGeneralException( _("Received an invalid network scan result: %r") % found) _add_scanned_hosts_to_folder(folder, found) result.update({ "state": True, "output": _("The network scan found %d new hosts.") % len(found), }) except Exception as e: result.update({ "state": False, "output": _("An exception occured: %s") % e, }) logger.error("Exception in network scan:\n%s", traceback.format_exc()) result["end"] = time.time() _save_network_scan_result(folder, result)
def filter_cre_heading_info(value: FilterHTTPVariables) -> Optional[str]: current_value = value.get("site") return get_site_config(current_value)["alias"] if current_value else None
def execute(self) -> Iterator[ACResult]: for site_id in sitenames(): site_config = get_site_config(site_id) for result in self._check_site(site_id, site_config): result.site_id = site_id yield result
def user_sync_config() -> UserSyncConfig: # use global option as default for reading legacy options and on remote site # for reading the value set by the WATO master site default_cfg = user_sync_default_config(omd_site()) return get_site_config(omd_site()).get("user_sync", default_cfg)