def _ticket_operation(operation, env, ticket, site_ip): if not env.is_cib_live: raise LibraryError(reports.live_environment_required(["CIB"])) if not site_ip: site_ip_list = resource.find_bound_ip( get_resources(env.get_cib()), get_config_file_name(env.booth.name) ) if len(site_ip_list) != 1: raise LibraryError( booth_reports.booth_cannot_determine_local_site_ip() ) site_ip = site_ip_list[0] stdout, stderr, return_code = env.cmd_runner().run([ settings.booth_binary, operation, "-s", site_ip, ticket ]) if return_code != 0: raise LibraryError( booth_reports.booth_ticket_operation_failed( operation, join_multilines([stderr, stdout]), site_ip, ticket ) )
def command_expect_live_env(self): if not self.__config.is_live: raise LibraryError( common_reports.live_environment_required([ "BOOTH_CONF", "BOOTH_KEY", ]))
def command_expect_live_env(self): if not self.__config.is_live: raise LibraryError( common_reports.live_environment_required([ "--booth-conf", "--booth-key", ]))
def node_clear(env, node_name, allow_clear_cluster_node=False): """ Remove specified node from various cluster caches. LibraryEnvironment env provides all for communication with externals string node_name bool allow_clear_cluster_node -- flag allows to clear node even if it's still in a cluster """ mocked_envs = [] if not env.is_cib_live: mocked_envs.append("CIB") if not env.is_corosync_conf_live: mocked_envs.append("COROSYNC_CONF") if mocked_envs: raise LibraryError(reports.live_environment_required(mocked_envs)) current_nodes = get_nodes(env.get_corosync_conf(), env.get_cib()) if (node_addresses_contain_name(current_nodes, node_name) or node_addresses_contain_host(current_nodes, node_name)): env.report_processor.process( reports.get_problem_creator( report_codes.FORCE_CLEAR_CLUSTER_NODE, allow_clear_cluster_node)( reports.node_to_clear_is_still_in_cluster, node_name)) remove_node(env.cmd_runner(), node_name)
def _ticket_operation(operation, env, ticket, site_ip): if not env.is_cib_live: raise LibraryError(reports.live_environment_required(["CIB"])) if not site_ip: site_ip_list = resource.find_bound_ip( get_resources(env.get_cib()), get_config_file_name(env.booth.name) ) if len(site_ip_list) != 1: raise LibraryError( booth_reports.booth_cannot_determine_local_site_ip() ) site_ip = site_ip_list[0] stdout, stderr, return_code = env.cmd_runner().run([ settings.booth_binary, operation, "-s", site_ip, ticket ]) if return_code != 0: raise LibraryError( booth_reports.booth_ticket_operation_failed( operation, join_multilines([stderr, stdout]), site_ip, ticket ) )
def disable_safe(env, resource_ids, strict, wait): """ Disallow specified resource to be started by the cluster only if there is no effect on other resources LibraryEnvironment env -- strings resource_ids -- ids of the resources to be disabled bool strict -- if False, allow resources to be migrated mixed wait -- False: no wait, None: wait default timeout, int: wait timeout """ if not env.is_cib_live: raise LibraryError( reports.live_environment_required([file_type_codes.CIB])) with resource_environment( env, wait, resource_ids, _ensure_disabled_after_wait(True)) as resources_section: id_provider = IdProvider(resources_section) resource_el_list = _find_resources_or_raise(resources_section, resource_ids) env.report_processor.process_list( _resource_list_enable_disable(resource_el_list, resource.common.disable, id_provider, env.get_cluster_state())) inner_resources_names_set = set() for resource_el in resource_el_list: inner_resources_names_set.update({ inner_resource_el.get("id") for inner_resource_el in resource.common.get_all_inner_resources(resource_el) }) plaintext_status, transitions, dummy_cib = simulate_cib( env.cmd_runner(), get_root(resources_section)) simulated_operations = ( simulate_tools.get_operations_from_transitions(transitions)) other_affected = set() if strict: other_affected = set( simulate_tools.get_resources_from_operations( simulated_operations, exclude=resource_ids)) else: other_affected = set( simulate_tools.get_resources_left_stopped( simulated_operations, exclude=resource_ids) + simulate_tools.get_resources_left_demoted( simulated_operations, exclude=resource_ids)) # Stopping a clone stops all its inner resources. That should not block # stopping the clone. other_affected = other_affected - inner_resources_names_set if other_affected: raise LibraryError( reports.resource_disable_affects_other_resources( resource_ids, other_affected, plaintext_status, ))
def _ensure_live_env(env): not_live = [] if not env.is_cib_live: not_live.append("CIB") if not env.is_corosync_conf_live: not_live.append("COROSYNC_CONF") if not_live: raise LibraryError(reports.live_environment_required(not_live))
def _ensure_live_env(env, booth_env): not_live = ( booth_env.ghost_file_codes + # parenthesis are cruciual, otherwise the if..else influences # booth_env.ghost_file_codes as well ([file_type_codes.CIB] if not env.is_cib_live else [])) if not_live: raise LibraryError(reports.live_environment_required(not_live))
def config_sync( env: LibraryEnvironment, instance_name=None, skip_offline_nodes=False, ): """ Send specified local booth configuration to all nodes in the local cluster. env string instance_name -- booth instance name skip_offline_nodes -- if True offline nodes will be skipped """ report_processor = env.report_processor booth_env = env.get_booth_env(instance_name) if not env.is_cib_live: raise LibraryError( reports.live_environment_required([file_type_codes.CIB], )) cluster_nodes_names, report_list = get_existing_nodes_names( env.get_corosync_conf()) if not cluster_nodes_names: report_list.append(reports.corosync_config_no_nodes_defined()) report_processor.report_list(report_list) try: booth_conf_data = booth_env.config.read_raw() booth_conf = booth_env.config.raw_to_facade(booth_conf_data) if isinstance(booth_env.config.raw_file, GhostFile): authfile_data = booth_env.key.read_raw() authfile_path = booth_conf.get_authfile() authfile_name = (os.path.basename(authfile_path) if authfile_path else None) else: authfile_name, authfile_data, authfile_report_list = ( config_files.get_authfile_name_and_data(booth_conf)) report_processor.report_list(authfile_report_list) except RawFileError as e: report_processor.report(raw_file_error_report(e)) except ParserErrorException as e: report_processor.report_list( booth_env.config.parser_exception_to_report_list(e)) if report_processor.has_errors: raise LibraryError() com_cmd = BoothSendConfig(env.report_processor, booth_env.instance_name, booth_conf_data, authfile=authfile_name, authfile_data=authfile_data, skip_offline_targets=skip_offline_nodes) com_cmd.set_targets(env.get_node_target_factory().get_target_list( cluster_nodes_names, skip_non_existing=skip_offline_nodes, )) run_and_raise(env.get_node_communicator(), com_cmd)
def _ensure_consistently_live_env(env): if env.is_cib_live and env.is_corosync_conf_live: return #we accept this as well, we need it for tests if not env.is_cib_live and not env.is_corosync_conf_live: return raise LibraryError( reports.live_environment_required( ["CIB" if not env.is_cib_live else "COROSYNC_CONF"]))
def _get_nodes_to_validate_against(env, tree): if not env.is_corosync_conf_live and env.is_cib_live: raise LibraryError(reports.live_environment_required(["COROSYNC_CONF" ])) if not env.is_cib_live and env.is_corosync_conf_live: #we do not try to get corosync.conf from live cluster when cib is not #taken from live cluster return get_existing_nodes_names_addrs(cib=tree) return get_existing_nodes_names_addrs(env.get_corosync_conf(), cib=tree)
def _get_nodes_to_validate_against(env, tree): if not env.is_corosync_conf_live and env.is_cib_live: raise LibraryError( reports.live_environment_required(["COROSYNC_CONF"]) ) if not env.is_cib_live and env.is_corosync_conf_live: #we do not try to get corosync.conf from live cluster when cib is not #taken from live cluster return get_existing_nodes_names_addrs(cib=tree) return get_existing_nodes_names_addrs(env.get_corosync_conf(), cib=tree)
def config_destroy(env, ignore_config_load_problems=False): env.booth.command_expect_live_env() if not env.is_cib_live: raise LibraryError(reports.live_environment_required(["CIB"])) name = env.booth.name config_is_used = partial(booth_reports.booth_config_is_used, name) report_list = [] if resource.find_for_config( get_resources(env.get_cib()), get_config_file_name(name), ): report_list.append(config_is_used("in cluster resource")) #Only systemd is currently supported. Initd does not supports multiple #instances (here specified by name) if external.is_systemctl(): if external.is_service_running(env.cmd_runner(), "booth", name): report_list.append(config_is_used("(running in systemd)")) if external.is_service_enabled(env.cmd_runner(), "booth", name): report_list.append(config_is_used("(enabled in systemd)")) if report_list: raise LibraryError(*report_list) authfile_path = None try: authfile_path = config_structure.get_authfile( parse(env.booth.get_config_content()) ) except LibraryError: if not ignore_config_load_problems: raise LibraryError(booth_reports.booth_cannot_identify_keyfile()) #if content not received, not valid,... still remove config needed env.report_processor.process( booth_reports.booth_cannot_identify_keyfile( severity=ReportItemSeverity.WARNING ) ) if( authfile_path and os.path.dirname(authfile_path) == settings.booth_config_dir ): env.booth.set_key_path(authfile_path) env.booth.remove_key() env.booth.remove_config()
def config_destroy(env, ignore_config_load_problems=False): env.booth.command_expect_live_env() if not env.is_cib_live: raise LibraryError(reports.live_environment_required(["CIB"])) name = env.booth.name config_is_used = partial(booth_reports.booth_config_is_used, name) report_list = [] if resource.find_for_config( get_resources(env.get_cib()), get_config_file_name(name), ): report_list.append(config_is_used("in cluster resource")) #Only systemd is currently supported. Initd does not supports multiple #instances (here specified by name) if external.is_systemctl(): if external.is_service_running(env.cmd_runner(), "booth", name): report_list.append(config_is_used("(running in systemd)")) if external.is_service_enabled(env.cmd_runner(), "booth", name): report_list.append(config_is_used("(enabled in systemd)")) if report_list: raise LibraryError(*report_list) authfile_path = None try: authfile_path = config_structure.get_authfile( parse(env.booth.get_config_content()) ) except LibraryError: if not ignore_config_load_problems: raise LibraryError(booth_reports.booth_cannot_identify_keyfile()) #if content not received, not valid,... still remove config needed env.report_processor.process( booth_reports.booth_cannot_identify_keyfile( severity=ReportItemSeverity.WARNING ) ) if( authfile_path and os.path.dirname(authfile_path) == settings.booth_config_dir ): env.booth.set_key_path(authfile_path) env.booth.remove_key() env.booth.remove_config()
def disable_simulate(env, resource_ids): """ Simulate disallowing specified resource to be started by the cluster LibraryEnvironment env -- strings resource_ids -- ids of the resources to be disabled """ if not env.is_cib_live: raise LibraryError( reports.live_environment_required([file_type_codes.CIB])) resources_section = get_resources(env.get_cib()) _disable_validate_and_edit_cib(env, resources_section, resource_ids) plaintext_status, dummy_transitions, dummy_cib = simulate_cib( env.cmd_runner(), get_root(resources_section)) return plaintext_status
def destroy(env: LibraryEnvironment, force_flags: Container[str] = ()) -> None: """ Destroy disaster-recovery configuration on all sites """ if env.ghost_file_codes: raise LibraryError( reports.live_environment_required(env.ghost_file_codes)) report_processor = SimpleReportProcessor(env.report_processor) skip_offline = report_codes.SKIP_OFFLINE_NODES in force_flags report_list, dr_config = _load_dr_config(env.get_dr_env().config) report_processor.report_list(report_list) if report_processor.has_errors: raise LibraryError() local_nodes, report_list = get_existing_nodes_names( env.get_corosync_conf()) report_processor.report_list(report_list) if report_processor.has_errors: raise LibraryError() remote_nodes: List[str] = [] for conf_remote_site in dr_config.get_remote_site_list(): remote_nodes.extend(conf_remote_site.node_name_list) target_factory = env.get_node_target_factory() report_list, targets = target_factory.get_target_list_with_reports( remote_nodes + local_nodes, skip_non_existing=skip_offline, ) report_processor.report_list(report_list) if report_processor.has_errors: raise LibraryError() com_cmd = RemoveFilesWithoutForces( env.report_processor, { "pcs disaster-recovery config": { "type": "pcs_disaster_recovery_conf", }, }, ) com_cmd.set_targets(targets) run_and_raise(env.get_node_communicator(), com_cmd)
def remove_nodes_from_cib(env, node_list): """ Remove specified nodes from CIB. When pcmk is running 'crm_node -R <node>' will be used. Otherwise nodes will be removed directly from CIB file. env LibraryEnvironment node_list iterable -- names of nodes to remove """ # TODO: more advanced error handling # TODO: Tests if not env.is_cib_live: raise LibraryError(reports.live_environment_required(["CIB"])) if is_service_running(env.cmd_runner(), "pacemaker"): for node in node_list: # this may raise a LibraryError # NOTE: crm_node cannot remove multiple nodes at once remove_node(env.cmd_runner(), node) return # TODO: We need to remove nodes from the CIB file. We don't want to do it # using environment as this is a special case in which we have to edit CIB # file directly. for node in node_list: stdout, stderr, retval = env.cmd_runner().run( [ settings.cibadmin, "--delete-all", "--force", f"--xpath=/cib/configuration/nodes/node[@uname='{node}']", ], env_extend={"CIB_file": os.path.join(settings.cib_dir, "cib.xml")} ) if retval != 0: raise LibraryError( reports.node_remove_in_pacemaker_failed( [node], reason=join_multilines([stderr, stdout]) ) )
def disable_safe(env, resource_ids, strict, wait): """ Disallow specified resource to be started by the cluster only if there is no effect on other resources LibraryEnvironment env -- strings resource_ids -- ids of the resources to be disabled bool strict -- if False, allow resources to be migrated mixed wait -- False: no wait, None: wait default timeout, int: wait timeout """ if not env.is_cib_live: raise LibraryError( reports.live_environment_required([file_type_codes.CIB])) with resource_environment( env, wait, resource_ids, _ensure_disabled_after_wait(True)) as resources_section: _disable_validate_and_edit_cib(env, resources_section, resource_ids) plaintext_status, transitions, dummy_cib = simulate_cib( env.cmd_runner(), get_root(resources_section)) simulated_operations = ( simulate_tools.get_operations_from_transitions(transitions)) other_affected = set() if strict: other_affected = set( simulate_tools.get_resources_from_operations( simulated_operations, exclude=resource_ids)) else: other_affected = set( simulate_tools.get_resources_left_stopped( simulated_operations, exclude=resource_ids) + simulate_tools.get_resources_left_demoted( simulated_operations, exclude=resource_ids)) if other_affected: raise LibraryError( reports.resource_disable_affects_other_resources( resource_ids, other_affected, plaintext_status, ))
def command_expect_live_corosync_env(self): # TODO get rid of cli knowledge if not self.is_corosync_conf_live: raise LibraryError( reports.live_environment_required(["--corosync_conf"]))
def command_expect_live_corosync_env(self): if not self.is_corosync_conf_live: raise LibraryError( reports.live_environment_required(["COROSYNC_CONF"]) )
def set_recovery_site(env: LibraryEnvironment, node_name: str) -> None: """ Set up disaster recovery with the local cluster being the primary site env node_name -- a known host from the recovery site """ if env.ghost_file_codes: raise LibraryError( reports.live_environment_required(env.ghost_file_codes)) report_processor = SimpleReportProcessor(env.report_processor) dr_env = env.get_dr_env() if dr_env.config.raw_file.exists(): report_processor.report(reports.dr_config_already_exist()) target_factory = env.get_node_target_factory() local_nodes, report_list = get_existing_nodes_names( env.get_corosync_conf(), error_on_missing_name=True) report_processor.report_list(report_list) if node_name in local_nodes: report_processor.report(reports.node_in_local_cluster(node_name)) report_list, local_targets = target_factory.get_target_list_with_reports( local_nodes, allow_skip=False, report_none_host_found=False) report_processor.report_list(report_list) report_list, remote_targets = (target_factory.get_target_list_with_reports( [node_name], allow_skip=False, report_none_host_found=False)) report_processor.report_list(report_list) if report_processor.has_errors: raise LibraryError() com_cmd = GetCorosyncConf(env.report_processor) com_cmd.set_targets(remote_targets) remote_cluster_nodes, report_list = get_existing_nodes_names( CorosyncConfigFacade.from_string( run_and_raise(env.get_node_communicator(), com_cmd)), error_on_missing_name=True) if report_processor.report_list(report_list): raise LibraryError() # ensure we have tokens for all nodes of remote cluster report_list, remote_targets = target_factory.get_target_list_with_reports( remote_cluster_nodes, allow_skip=False, report_none_host_found=False) if report_processor.report_list(report_list): raise LibraryError() dr_config_exporter = (get_file_toolbox( file_type_codes.PCS_DR_CONFIG).exporter) # create dr config for remote cluster remote_dr_cfg = dr_env.create_facade(DrRole.RECOVERY) remote_dr_cfg.add_site(DrRole.PRIMARY, local_nodes) # send config to all node of remote cluster distribute_file_cmd = DistributeFilesWithoutForces( env.report_processor, node_communication_format.pcs_dr_config_file( dr_config_exporter.export(remote_dr_cfg.config))) distribute_file_cmd.set_targets(remote_targets) run_and_raise(env.get_node_communicator(), distribute_file_cmd) # create new dr config, with local cluster as primary site local_dr_cfg = dr_env.create_facade(DrRole.PRIMARY) local_dr_cfg.add_site(DrRole.RECOVERY, remote_cluster_nodes) distribute_file_cmd = DistributeFilesWithoutForces( env.report_processor, node_communication_format.pcs_dr_config_file( dr_config_exporter.export(local_dr_cfg.config))) distribute_file_cmd.set_targets(local_targets) run_and_raise(env.get_node_communicator(), distribute_file_cmd)
def status_all_sites_plaintext( env: LibraryEnvironment, hide_inactive_resources: bool = False, verbose: bool = False, ) -> List[Mapping[str, Any]]: """ Return local site's and all remote sites' status as plaintext env -- LibraryEnvironment hide_inactive_resources -- if True, do not display non-running resources verbose -- if True, display more info """ # The command does not provide an option to skip offline / unreacheable / # misbehaving nodes. # The point of such skipping is to stop a command if it is unable to make # changes on all nodes. The user can then decide to proceed anyway and # make changes on the skipped nodes later manually. # This command only reads from nodes so it automatically asks other nodes # if one is offline / misbehaving. class SiteData(): local: bool role: DrRole target_list: Iterable[RequestTarget] status_loaded: bool status_plaintext: str def __init__(self, local, role, target_list): self.local = local self.role = role self.target_list = target_list self.status_loaded = False self.status_plaintext = "" if env.ghost_file_codes: raise LibraryError( reports.live_environment_required(env.ghost_file_codes)) report_processor = SimpleReportProcessor(env.report_processor) report_list, dr_config = _load_dr_config(env.get_dr_env().config) report_processor.report_list(report_list) if report_processor.has_errors: raise LibraryError() site_data_list = [] target_factory = env.get_node_target_factory() # get local nodes local_nodes, report_list = get_existing_nodes_names( env.get_corosync_conf()) report_processor.report_list(report_list) report_list, local_targets = target_factory.get_target_list_with_reports( local_nodes, skip_non_existing=True, ) report_processor.report_list(report_list) site_data_list.append(SiteData(True, dr_config.local_role, local_targets)) # get remote sites' nodes for conf_remote_site in dr_config.get_remote_site_list(): report_list, remote_targets = ( target_factory.get_target_list_with_reports( conf_remote_site.node_name_list, skip_non_existing=True, )) report_processor.report_list(report_list) site_data_list.append( SiteData(False, conf_remote_site.role, remote_targets)) if report_processor.has_errors: raise LibraryError() # get all statuses for site_data in site_data_list: com_cmd = GetFullClusterStatusPlaintext( report_processor, hide_inactive_resources=hide_inactive_resources, verbose=verbose, ) com_cmd.set_targets(site_data.target_list) site_data.status_loaded, site_data.status_plaintext = run_com_cmd( env.get_node_communicator(), com_cmd) return [ DrSiteStatusDto( site_data.local, site_data.role, site_data.status_plaintext, site_data.status_loaded, ).to_dict() for site_data in site_data_list ]
def command_expect_live_env(self): if not self.__config.is_live: raise LibraryError(common_reports.live_environment_required(["--booth-conf", "--booth-key"]))
def _ensure_live_booth_env(booth_env): if booth_env.ghost_file_codes: raise LibraryError( reports.live_environment_required(booth_env.ghost_file_codes))
def command_expect_live_corosync_env(self): if not self.is_corosync_conf_live: raise LibraryError(reports.live_environment_required([ "--corosync_conf" ]))
def command_expect_live_corosync_env(self): # TODO get rid of cli knowledge if not self.is_corosync_conf_live: raise LibraryError(reports.live_environment_required([ "--corosync_conf" ]))
def command_expect_live_env(self): if not self.__config.is_live: raise LibraryError(common_reports.live_environment_required([ "BOOTH_CONF", "BOOTH_KEY", ]))