def generate_snmp_spec(self, spec): """ Return spec content for snmp-destination Args: spec (Dict): snmp-destination service spec config Returns: service_spec (Str) Example:: specs: - service_type: snmp-destination spec: credentials: snmp_v3_auth_username: myadmin snmp_v3_auth_password: mypassword """ template = self._get_template("snmp") destination_node = spec["spec"].pop("snmp_destination", None) node = get_node_by_id(self.cluster, destination_node) if destination_node: spec["spec"]["snmp_destination"] = self.get_addr(node) + ":162" node_installer = get_node_by_id(self.cluster, "node1") cmd = "cephadm shell ceph fsid" out, err = node_installer.exec_command(sudo=True, cmd=cmd) LOG.info(f"fsid: {out}") engine_id = out.replace("-", "") if engine_id: spec["spec"]["engine_id"] = engine_id return template.render(spec=spec)
def generate_host_spec(self, spec): """ Return hosts spec content based on host config Args: spec (Dict): hosts specification Returns: hosts_spec (Str) Example:: spec: - service_type: host address: true labels: apply-all-labels nodes: - node2 - node3 """ template = self._get_template("host") hosts = [] address = spec.get("address") labels = spec.get("labels") for node_name in spec["nodes"]: host = dict() node = get_node_by_id(self.cluster, node_name) host["hostname"] = self.get_hostname(node) if address: host["address"] = self.get_addr(node) if labels: host["labels"] = self.get_labels(node) hosts.append(host) return template.render(hosts=hosts)
def run(ceph_cluster, **kw): ansible_dir = "/usr/share/ceph-ansible" ceph_installer = ceph_cluster.get_ceph_object("installer") config = kw.get("config") build = config.get("build", config.get("rhbuild")) daemon_to_kill = config.get("daemon-to-kill") daemon = config.get("daemon") instance = config.get("instance") playbook = f"shrink-{daemon}.yml" short_names = [] # For all daemons node name is required but for osds the osd.id is required to shrink daemon if daemon != "osd": for node in daemon_to_kill: short_name = get_node_by_id(ceph_cluster, node).shortname short_names.append(short_name) node_name = ",".join(short_names) log.info( f"Executing {playbook} playbook to shrink {node_name} daemons") else: daemons_to_kill = ",".join(daemon_to_kill) log.info( f"Executing {playbook} playbook to shrink {daemons_to_kill} daemons" ) check_inventory = f"sudo cat {ansible_dir}/hosts" # Display inventory before shrinking outbuf, _ = ceph_installer.exec_command(cmd=check_inventory) log.info(f"Inventory {outbuf}") # Based of RHCS version, use the playbook path if build.startswith("4"): playbook = f"infrastructure-playbooks/{playbook}" else: ceph_installer.exec_command( sudo=True, cmd= f"cd {ansible_dir};cp -R {ansible_dir}/infrastructure-playbooks/{playbook} .", ) cmd = f"cd {ansible_dir}; ansible-playbook -vvvv -e ireallymeanit=yes {playbook}" # adding extra vars to the shrink playbook if daemon == "osd": cmd += f" -e {daemon}_to_kill={daemons_to_kill} -i hosts" elif daemon == "rgw": cmd += f" -e {daemon}_to_kill={node_name}.rgw{instance} -i hosts" else: cmd += f" -e {daemon}_to_kill={node_name} -i hosts" # Executing shrink playbook depending on provided daemon err = ceph_installer.exec_command(cmd=cmd, long_running=True) # If playbook execution fails log error if err != 0: log.error(f"Failed during ansible playbook execution: {playbook}\n") return err return 0
def translate_to_ip(clusters, cluster_name: str, string: str) -> str: """ Return the string after replacing ip: <node> pattern with the IP address of <node>. In this method, the pattern {ip:<cluster>#<node>} would be replaced with the value of node.ipaddress. Args: clusters: Ceph cluster instance cluster_name: Name of the cluster under test. string: String that needs to be searched Return: String with node IDs replaced with IP addresses """ replaced_string = string node_list = re.findall("{node_ip:(.+?)}", string) for node in node_list: node_ = node if "#" in node: cluster_name, node = node.split("#") node_ip = get_node_by_id(clusters[cluster_name], node).ip_address replacement_pattern = "{node_ip:" + node_ + "}" replaced_string = re.sub(replacement_pattern, node_ip, replaced_string) return replaced_string
def down_osd_with_umount(osd_id, ceph_cluster): """Make OSD down by un-mounting OSD device path. - Find NODE where OSD resides - Stop OSD service using systemctl "sudo systemctl stop ceph-osd@{id}.service". - Disable Daemon "sudo systemctl disable ceph-osd{id}.service". - Find mount path using ceph OSD Id. - umount OSD device path. - Validate mount is been removed. Args: osd_id: OSD id ceph_cluster: ceph cluster object """ mon_node = ceph_cluster.get_ceph_object("mon") osd_info = find_osd_by_id(osd_id, mon_node) # find OSD node osd_node = get_node_by_id(ceph_cluster, osd_info["host"]) # Stop and disable OSD daemon systemctl(osd_node, "stop", f"ceph-osd@{osd_id}") systemctl(osd_node, "disable", f"ceph-osd@{osd_id}") # umount OSD device osd_node.exec_command( cmd=f"umount /var/lib/ceph/osd/ceph-{osd_id}", sudo=True, ) osd_node.exec_command( cmd="mount | grep ceph", sudo=True, )
def run(ceph_cluster, **kwargs: Any) -> int: """ Test module for setting up snmp destination node. Args: ceph_cluster: The participating Ceph cluster object kwargs: Supported key value pairs for the key config are node | node on which commands to be executed cmd | Set of commands to be executed Returns: 0 - on success 1 - on failure Raises: CommandError """ config = kwargs["config"] nodes = config.get("node") node = get_node_by_id(ceph_cluster, nodes) LOG.info(f"node: {node.shortname}") try: configure_firewalld(node) configure_snmptrapd(node, ceph_cluster, config) except BaseException as be: LOG.error(be) return 1 return 0
def set_address(self, config): """ Set IP address to node - Attach address to existing nodes config: service: host command: set_address base_cmd_args: verbose: true args: node: node1 Args: config """ cmd = ["ceph", "orch"] if config.get("base_cmd_args"): cmd.append(config_dict_to_string(config["base_cmd_args"])) args = config["args"] node = args.pop("node") if not isinstance(node, CephNode): node = get_node_by_id(self.cluster, node) if not node: raise ResourceNotFoundError("%s node not found/provided") logger.info("Set Address on this node : %s" % node.ip_address) cmd.extend(["host", "set-addr", node.shortname, node.ip_address]) self.shell(args=cmd) assert node.ip_address in self.get_addr_by_name(node.shortname)
def label_add(self, config): """ Add/Attach label to nodes - Attach labels to existing nodes - if nodes are empty, all cluster nodes are considered - roles defined to each node used as labels( eg., [mon, mgr]) Args: config (Dict): label add configuration Example:: config: service: host command: label_add base_cmd_args: verbose: true args: node: node1 labels: - mon - osd """ cmd = ["ceph", "orch"] if config.get("base_cmd_args"): cmd.append(config_dict_to_string(config["base_cmd_args"])) args = config["args"] node = args.pop("node") if not isinstance(node, CephNode): node = get_node_by_id(self.cluster, node) if not node: raise ResourceNotFoundError("%s node not found/provided") _labels = args.get("labels") if isinstance(_labels, str) and _labels in "apply-all-labels": _labels = node.role.role_list if not _labels: raise ResourceNotFoundError("labels not found/provided") logger.info("Add label(s) %s on node %s" % (_labels, node.ip_address)) for label in _labels: _cmd = deepcopy(cmd) _cmd.extend(["host", "label", "add", node.hostname, label]) self.shell(args=_cmd) assert label in self.fetch_labels_by_hostname(node.hostname) if config.get("validate_admin_keyring") and label == "_admin": logger.info("Ceph keyring - default: %s" % DEFAULT_KEYRING_PATH) if not monitoring_file_existence(node, DEFAULT_KEYRING_PATH): raise HostOpFailure("Ceph keyring not found") if not monitoring_file_existence(node, DEFAULT_CEPH_CONF_PATH): raise HostOpFailure("Ceph configuration file not found") logger.info( "Ceph configuration and Keyring found on admin node...")
def label_remove(self, config): """ Removes label from nodes - remove labels from existing nodes - if nodes are empty, all cluster nodes are considered Args: config (Dict): label remove configuration Example:: config: service: host command: label_remove base_cmd_args: verbose: true args: node: node1 labels: - mon - osd """ cmd = ["ceph", "orch"] if config.get("base_cmd_args"): cmd.append(config_dict_to_string(config["base_cmd_args"])) args = config["args"] node = args.pop("node") if not isinstance(node, CephNode): node = get_node_by_id(self.cluster, node) if not node: raise ResourceNotFoundError("%s node not found/provided") _labels = args.get("labels") if isinstance(_labels, str) and _labels in "apply-all-labels": _labels = node.role.role_list if not _labels: raise ResourceNotFoundError("labels not found/provided") logger.info("Remove label(s) %s on node %s" % (_labels, node.ip_address)) for label in _labels: _cmd = deepcopy(cmd) _cmd.extend(["host", "label", "rm", node.shortname, label]) self.shell(args=_cmd)
def label_add(self, config): """ Add/Attach label to nodes - Attach labels to existing nodes - if nodes are empty, all cluster nodes are considered - roles defined to each node used as labels( eg., [mon, mgr]) config: service: host command: label_add base_cmd_args: verbose: true args: node: node1 labels: - mon - osd Args: config """ cmd = ["ceph", "orch"] if config.get("base_cmd_args"): cmd.append(config_dict_to_string(config["base_cmd_args"])) args = config["args"] node = args.pop("node") if not isinstance(node, CephNode): node = get_node_by_id(self.cluster, node) if not node: raise ResourceNotFoundError("%s node not found/provided") _labels = args.get("labels") if isinstance(_labels, str) and _labels in "apply-all-labels": _labels = node.role.role_list if not _labels: raise ResourceNotFoundError("labels not found/provided") logger.info("Add label(s) %s on node %s" % (_labels, node.ip_address)) for label in _labels: _cmd = deepcopy(cmd) _cmd.extend(["host", "label", "add", node.shortname, label]) self.shell(args=_cmd) assert label in self.fetch_labels_by_hostname(node.shortname)
def remove(cls, config: Dict) -> None: """ configure client using the provided configuration. Args: cls: cephadm object config: Key/value pairs provided by the test case to create the client. Example:: config: command: remove id: client.0 # client Id node: "node8" # client node remove_packages: - ceph-common # Remove ceph common packages remove_admin_keyring: true # Copy admin keyring to node """ node = get_node_by_id(cls.cluster, config["node"]) id_ = config["id"] cls.shell(args=["ceph", "auth", "del", id_], ) if config.get("remove_admin_keyring"): node.exec_command( cmd="rm -rf /etc/ceph/ceph.client.admin.keyring", sudo=True, ) node.exec_command(sudo=True, cmd=f"rm -rf /etc/ceph/ceph.{id_}.keyring", check_ec=False) out, _ = node.exec_command(cmd="ls -ltrh /etc/ceph/", sudo=True) log.info(out.read().decode().strip()) # Remove packages like ceph-common # Be-careful it may remove entire /etc/ceph directory if config.get("remove_packages"): for pkg in config.get("remove_packages"): node.exec_command( cmd=f"yum remove -y {pkg}", sudo=True, )
def zap(self, config: Dict) -> None: """ Zap particular device Args: config (Dict): Zap configs Returns: output (Str), error (Str) returned by the command. Example:: command: zap base_cmd_args: verbose: true pos_args: - "node1" - "/dev/vdb" args: force: true """ base_cmd = ["ceph", "orch"] if config.get("base_cmd_args"): base_cmd_args_str = config_dict_to_string( config.get("base_cmd_args")) base_cmd.append(base_cmd_args_str) base_cmd.extend(["device", "zap"]) pos_args = config["pos_args"] node = pos_args[0] host_id = get_node_by_id(self.cluster, node) host = host_id.shortname assert host base_cmd.append(host) base_cmd.extend(pos_args[1:]) if config and config.get("args"): args = config.get("args") base_cmd.append(config_dict_to_string(args)) return self.shell(args=base_cmd)
def add(self: DaemonProtocol, config: Dict): """ Execute the add method using the object's service name. Args: config (Dict): Key/value pairs passed from the test suite. Example:: config: service: osd command: add base_cmd_args: verbose: true pos_args: - node1 - /dev/vdb """ service = config.pop("service") base_cmd = ["ceph", "orch"] base_cmd.extend(["daemon", "add", service]) if config.get("base_cmd_args"): base_cmd.append(config_dict_to_string(config["base_cmd_args"])) pos_args = config["pos_args"] node = pos_args[0] host_id = get_node_by_id(self.cluster, node) host = host_id.shortname if service == "osd": base_cmd.extend([f"{host}:{','.join(pos_args[1:])}"]) else: if pos_args: base_cmd += pos_args base_cmd.append(host) out, _ = self.shell(base_cmd)
def remove(self, config): """ Remove host from cluster Args: config (Dict): Remove host configuration Example:: config: service: host command: remove base_cmd_args: verbose: true # arguments to ceph orch args: node: "node2" # node-name or object """ cmd = ["ceph", "orch"] if config.get("base_cmd_args"): cmd.append(config_dict_to_string(config["base_cmd_args"])) args = config["args"] node = args.pop("node") if not isinstance(node, CephNode): node = get_node_by_id(self.cluster, node) if not node: raise ResourceNotFoundError("%s node not found/provided") if (node.hostname == self.installer.node.hostname or node.hostname not in self.fetch_host_names()): return logger.info("Removing node %s" % node.ip_address) cmd.extend(["host", "rm", node.hostname]) self.shell(args=cmd) assert node.hostname not in self.fetch_host_names()
def generate_snmp_dest_conf(self, spec): """ Return conf content for snmp-gateway service Args: spec (Dict): snmp-gateway service config Returns: destination_conf (Str) Example:: spec: - service_type: snmp-gateway service_name: snmp-gateway placement: count: 1 spec: credentials: snmp_v3_auth_username: <user_name> snmp_v3_auth_password: <password> port: 9464 snmp_destination: node snmp_version: V3 """ template = self._get_template("snmp_destination") node = get_node_by_id(self.cluster, "node1") cmd = "cephadm shell ceph fsid" out, err = node.exec_command(sudo=True, cmd=cmd) LOG.info(f"fsid: {out}") fsid = out.replace("-", "") engine_id = fsid[0:32] if engine_id: spec["spec"]["engine_id"] = engine_id LOG.info(f"fsid:{engine_id}") return template.render(spec=spec)
def translate_to_hostname(cluster, string: str) -> str: """ Return the string with node ID replaced with shortname. In this method, the pattern {node:node1} would be replaced with the value of node.shortname. Args: cluster: Ceph cluster instance string: String to be searched for node ID pattern Return: String whose node ID's are replaced with shortnames """ replaced_string = string node_list = re.findall("{node:(.+?)}", string) for node in node_list: node_name = get_node_by_id(cluster, node).shortname replacement_pattern = "{node:" + node + "}" replaced_string = re.sub(replacement_pattern, node_name, replaced_string) return replaced_string
def add(cls, config: Dict) -> None: """configure client using the provided configuration. Args: cls: cephadm object config: Key/value pairs provided by the test case to create the client. Example:: config: command: add id: client.1 # client Id node: "node8" # client node copy_ceph_conf: true|false # copy ceph conf to provided node store-keyring: true # store keyrin locally under /etc/ceph install_packages: - ceph_common # install ceph common packages copy_admin_keyring: true|false # copy admin keyring caps: # authorize client capabilities - "mon 'allow r'" - "osd 'allow rw pool=liverpool'" """ id_ = config["id"] client_file = f"/etc/ceph/ceph.{id_}.keyring" # Create client cmd = ["ceph", "auth", "get-or-create", f"{id_}"] [cmd.append(f"{k} '{v}'") for k, v in config.get("caps", {}).items()] cnt_key, err = cls.shell(args=cmd) def put_file(client, file_name, content, file_mode, sudo=True): file_ = client.remote_file(sudo=sudo, file_name=file_name, file_mode=file_mode) file_.write(content) file_.flush() if config.get("node"): node = get_node_by_id(cls.cluster, config["node"]) # Copy the keyring to client node.exec_command(sudo=True, cmd="mkdir -p /etc/ceph") put_file(node, client_file, cnt_key, "w") if config.get("copy_ceph_conf", True): # Get minimal ceph.conf ceph_conf, err = cls.shell( args=["ceph", "config", "generate-minimal-conf"]) # Copy the ceph.conf to client put_file(node, "/etc/ceph/ceph.conf", ceph_conf, "w") # Copy admin keyring to client node if config.get("copy_admin_keyring"): admin_keyring, _ = cls.shell( args=["ceph", "auth", "get", "client.admin"]) put_file(node, "/etc/ceph/ceph.client.admin.keyring", admin_keyring, "w") # Install ceph-common if config.get("install_packages"): for pkg in config.get("install_packages"): node.exec_command(cmd=f"yum install -y --nogpgcheck {pkg}", sudo=True) out, _ = node.exec_command(cmd="ls -ltrh /etc/ceph/", sudo=True) log.info(out.read().decode().strip()) # Hold local copy of the client key-ring in the installer node if config.get("store-keyring"): put_file(cls.installer, client_file, cnt_key, "w")
def run(ceph_cluster, **kw): LOG.info("Running test") ceph_nodes = kw.get("ceph_nodes") LOG.info("Running ceph ansible test") config = kw.get("config") test_data = kw.get("test_data") prev_install_version = test_data["install_version"] skip_version_compare = config.get("skip_version_compare") limit_node = config.get("limit") containerized = config.get("ansi_config").get("containerized_deployment") build = config.get("build", config.get("rhbuild")) LOG.info("Build for upgrade: {build}".format(build=build)) cluster_name = config.get("ansi_config").get("cluster") ubuntu_repo = config.get("ubuntu_repo") hotfix_repo = config.get("hotfix_repo") cloud_type = config.get("cloud-type", "openstack") base_url = config.get("base_url") installer_url = config.get("installer_url") config["ansi_config"]["public_network"] = get_public_network(ceph_nodes[0]) ceph_cluster.ansible_config = config["ansi_config"] ceph_cluster.custom_config = test_data.get("custom-config") ceph_cluster.custom_config_file = test_data.get("custom-config-file") ceph_cluster.use_cdn = config.get("use_cdn") config["ansi_config"].update( set_container_info(ceph_cluster, config, ceph_cluster.use_cdn, containerized) ) # Translate RGW node to ip address for Multisite rgw_pull_host = config["ansi_config"].get("rgw_pullhost") if rgw_pull_host: ceph_cluster.ansible_config["rgw_pullhost"] = translate_to_ip( kw["ceph_cluster_dict"], ceph_cluster.name, rgw_pull_host ) ceph_installer = ceph_cluster.get_ceph_object("installer") ansible_dir = "/usr/share/ceph-ansible" if config.get("skip_setup") is True: LOG.info("Skipping setup of ceph cluster") return 0 # set pre-upgrade install version test_data["install_version"] = build LOG.info("Previous install version: {}".format(prev_install_version)) # retrieve pre-upgrade versions and initialize container counts pre_upgrade_versions = get_ceph_versions(ceph_cluster.get_nodes(), containerized) pre_upgrade_container_counts = {} # setup packages based on build ceph_cluster.setup_packages( base_url, hotfix_repo, installer_url, ubuntu_repo, build, cloud_type ) # backup existing hosts file and ansible config ceph_installer.exec_command(cmd="cp {}/hosts /tmp/hosts".format(ansible_dir)) ceph_installer.exec_command( cmd="cp {}/group_vars/all.yml /tmp/all.yml".format(ansible_dir) ) # update ceph-ansible ceph_installer.install_ceph_ansible(build, upgrade=True) # restore hosts file ceph_installer.exec_command( sudo=True, cmd="cp /tmp/hosts {}/hosts".format(ansible_dir) ) # If upgrading from version 2 update hosts file with mgrs if prev_install_version.startswith("2") and build.startswith("3"): collocate_mons_with_mgrs(ceph_cluster, ansible_dir) # configure fetch directory path if config.get("ansi_config").get("fetch_directory") is None: config["ansi_config"]["fetch_directory"] = "~/fetch/" # set the docker image tag if necessary if containerized and config.get("ansi_config").get("docker-insecure-registry"): config["ansi_config"]["ceph_docker_image_tag"] = get_latest_container_image_tag( build ) LOG.info("gvar: {}".format(config.get("ansi_config"))) gvar = yaml.dump(config.get("ansi_config"), default_flow_style=False) # create all.yml LOG.info("global vars {}".format(gvar)) gvars_file = ceph_installer.remote_file( sudo=True, file_name="{}/group_vars/all.yml".format(ansible_dir), file_mode="w" ) gvars_file.write(gvar) gvars_file.flush() # retrieve container count if containerized if containerized: pre_upgrade_container_counts = get_container_counts(ceph_cluster) # configure insecure registry if necessary if config.get("docker-insecure-registry"): ceph_cluster.setup_insecure_registry() # copy rolling update from infrastructure playbook jewel_minor_update = build.startswith("2") if build.startswith("4") or build.startswith("5"): cmd = ( "cd {};" "ANSIBLE_STDOUT_CALLBACK=debug;" "ansible-playbook -e ireallymeanit=yes -vvvv -i " "hosts infrastructure-playbooks/rolling_update.yml".format(ansible_dir) ) else: ceph_installer.exec_command( sudo=True, cmd="cd {} ; cp infrastructure-playbooks/rolling_update.yml .".format( ansible_dir ), ) cmd = ( "cd {};" "ANSIBLE_STDOUT_CALLBACK=debug;" "ansible-playbook -e ireallymeanit=yes -vvvv -i hosts rolling_update.yml".format( ansible_dir ) ) if jewel_minor_update: cmd += " -e jewel_minor_update=true" LOG.info("Upgrade is jewel_minor_update, cmd: {cmd}".format(cmd=cmd)) if config.get("ansi_cli_args"): cmd += config_dict_to_string(config["ansi_cli_args"]) short_names = [] if limit_node: for node in limit_node: short_name = get_node_by_id(ceph_cluster, node).shortname short_names.append(short_name) matched_short_names = ",".join(short_names) cmd += f" --limit {matched_short_names}" out, rc = ceph_installer.exec_command(cmd=cmd, long_running=True) if rc != 0: LOG.error("Failed during upgrade (rc = {})".format(rc)) return rc # set build to new version LOG.info("Setting install_version to {build}".format(build=build)) test_data["install_version"] = build ceph_cluster.rhcs_version = build # check if all mon's and osd's are in correct state num_osds = ceph_cluster.ceph_demon_stat["osd"] num_mons = ceph_cluster.ceph_demon_stat["mon"] test_data["ceph-ansible"] = { "num-osds": num_osds, "num-mons": num_mons, "rhbuild": build, } # compare pre and post upgrade versions if skip_version_compare: LOG.warning("Skipping version comparison.") else: if not jewel_minor_update: post_upgrade_versions = get_ceph_versions(ceph_nodes, containerized) version_compare_fail = compare_ceph_versions( pre_upgrade_versions, post_upgrade_versions ) if version_compare_fail: return version_compare_fail # compare pre and post upgrade container counts if containerized: post_upgrade_container_counts = get_container_counts(ceph_cluster) container_count_fail = compare_container_counts( pre_upgrade_container_counts, post_upgrade_container_counts, prev_install_version, ) if container_count_fail: return container_count_fail client = ceph_cluster.get_ceph_object("mon") if build.startswith("5"): cmd = ( "cd {};" "ANSIBLE_STDOUT_CALLBACK=debug;" "ansible-playbook -e ireallymeanit=yes -vvvv -i " "hosts infrastructure-playbooks/cephadm-adopt.yml".format(ansible_dir) ) out, rc = ceph_installer.exec_command(cmd=cmd, long_running=True) if rc != 0: LOG.error("Failed during cephadm adopt (rc = {})".format(rc)) return rc client = ceph_cluster.get_nodes("mon")[0] return ceph_cluster.check_health( build, cluster_name=cluster_name, client=client, timeout=config.get("timeout", 300), )
def add(self, config): """ Add host to cluster Args: config (Dict): host addition configuration Example:: config: service: host command: add base_cmd_args: # arguments to ceph orch concise: true block: true args: node: "node2" # node-name or object attach_ip_address: bool # true or false labels: [mon, osd] or apply-all-labels add_label: host added with labels(roles assigned are considered) attach_address: host added with ip address(host ip address used) labels are considered if list of strings are provided or all roles associated with node will be considered if string "apply-all-labels" """ cmd = ["ceph", "orch"] if config.get("base_cmd_args"): cmd.append(config_dict_to_string(config["base_cmd_args"])) args = config["args"] node = args.pop("node") ceph_node = get_node_by_id(self.cluster, node_name=node) if not ceph_node: raise ResourceNotFoundError(f"No matching resource found: {node}") # Skipping client node, if only client label is attached if (len(ceph_node.role.role_list) == 1 and ["client"] == ceph_node.role.role_list): return attach_address = args.get("attach_ip_address") _labels = args.get("labels") if isinstance(_labels, str) and _labels == "apply-all-labels": label_set = set(ceph_node.role.role_list) _labels = list(label_set) cmd.extend(["host", "add", ceph_node.hostname]) if attach_address: cmd.append(ceph_node.ip_address) if _labels: # To fill mandate <address> argument, In case attach_address is False if not attach_address: cmd.append("''") cmd += _labels logger.info("Adding node %s, (attach_address: %s, labels: %s)" % (ceph_node.ip_address, attach_address, _labels)) # Add host self.shell(args=cmd) # validate host existence if ceph_node.hostname not in self.fetch_host_names(): raise HostOpFailure( f"Hostname verify failure. Expected {ceph_node.hostname}") if attach_address: if ceph_node.ip_address != self.get_addr_by_name( ceph_node.hostname): raise HostOpFailure( f"IP address verify failed. Expected {ceph_node.ip_address}" ) if _labels: assert sorted(self.fetch_labels_by_hostname( ceph_node.hostname)) == sorted(_labels) if config.get("validate_admin_keyring") and "_admin" in _labels: if not monitoring_file_existence(ceph_node, DEFAULT_KEYRING_PATH): raise HostOpFailure("Ceph keyring not found") if not monitoring_file_existence(ceph_node, DEFAULT_CEPH_CONF_PATH): raise HostOpFailure("Ceph configuration file not found") logger.info("Ceph configuration and Keyring found")
def run(ceph_cluster, **kw): """ enables connectivity mode and deploys stretch cluster with arbiter mon node Args: ceph_cluster (ceph.ceph.Ceph): ceph cluster """ log.info("Deploying stretch cluster with arbiter mon node") log.info(run.__doc__) config = kw.get("config") cephadm = CephAdmin(cluster=ceph_cluster, **config) rados_obj = RadosOrchestrator(node=cephadm) mon_obj = MonElectionStrategies(rados_obj=rados_obj) client_node = ceph_cluster.get_nodes(role="client")[0] site1_name = config["site1"]["name"] site2_name = config["site2"]["name"] # disabling automatic crush update cmd = "ceph config set osd osd_crush_update_on_start false" cephadm.shell([cmd]) # Sleeping for 2 seconds after map update. time.sleep(2) # Setting the election strategy to connectivity mode if not mon_obj.set_election_strategy(mode="connectivity"): log.error("could not set election strategy to connectivity mode") return 1 # Sleeping for 2 seconds after strategy update. time.sleep(2) # Checking updated election strategy in mon map strategy = mon_obj.get_election_strategy() if strategy != 3: log.error( f"cluster created election strategy other than connectivity, i.e {strategy}" ) return 1 log.info("Enabled connectivity mode on the cluster") # Creating new datacenter crush objects and moving under root/default for name in [site1_name, site2_name]: cmd = f"ceph osd crush add-bucket {name} datacenter" rados_obj.run_ceph_command(cmd) time.sleep(2) move_crush_item(cephadm, crush_obj=name, name="root", value="default") time.sleep(2) # Moving all the OSD and Mon daemons into respective sites sites = ["site1", "site2", "site3"] for site in sites: mon_hosts = [ host_obj.hostname for host_obj in ceph_cluster.get_nodes(role="mon") ] log.info(f"Mon hosts defined: {mon_hosts}") osd_hosts = [ host_obj.hostname for host_obj in ceph_cluster.get_nodes(role="osd") ] log.info(f"OSD hosts defined: {osd_hosts}") # Collecting hosts from each site and setting locations accordingly site_details = config[site] crush_name = site_details["name"] host_nodes = cephadm.cluster.get_nodes() for item in site_details["hosts"]: host = [ node for node in host_nodes if re.search(item, node.hostname) ][0] # Moving the mon daemons into site if host.hostname in mon_hosts: cmd = f"ceph mon set_location {host.hostname} datacenter={crush_name}" cephadm.shell([cmd]) log.info( f"Set location for mon {host.hostname} onto site {crush_name}\n" "sleeping for 5 seconds") time.sleep(5) # Moving the osd daemons into site if host.hostname in osd_hosts: move_crush_item( node=cephadm, crush_obj=host.hostname, name="datacenter", value=crush_name, ) log.info( f"Set location for OSD {host.hostname} onto site {crush_name}\n" "sleeping for 5 seconds") time.sleep(5) log.info("Moved all the hosts into respective sites") stretch_rule_name = config.get("stretch_rule_name", "stretch_rule") if not setup_crush_rule( node=client_node, rule_name=stretch_rule_name, site1=site1_name, site2=site2_name, ): log.error("Failed to Add crush rules in the crush map") return 1 # Sleeping for 5 sec for the strategy to be active time.sleep(5) # Enabling the stretch cluster mode tiebreaker_node = get_node_by_id(cephadm.cluster, config["site3"]["hosts"][0]) log.info(f"tiebreaker node provided: {tiebreaker_node.hostname}") cmd = f"ceph mon enable_stretch_mode {tiebreaker_node.hostname} {stretch_rule_name} datacenter" try: cephadm.shell([cmd]) except Exception as err: log.error( f"Error while enabling stretch rule on the datacenter. Command : {cmd}" ) log.error(err) return 1 time.sleep(2) # wait for PG's to settle down with new crush rules after deployment of stretch mode wait_for_clean_pg_sets(rados_obj) # Checking if the pools have been updated with the new crush rules acting_set = rados_obj.get_pg_acting_set() if len(acting_set) != 4: log.error( f"There are {len(acting_set)} OSD's in PG. OSDs: {acting_set}. Stretch cluster requires 4" ) return 1 log.info(f"Acting set : {acting_set} Consists of 4 OSD's per PG") log.info("Stretch rule with arbiter monitor node set up successfully") return 0
def __maintenance(self: ServiceProtocol, config: Dict, op: str) -> None: """Perform host maintenance operations using orchestrator Args: config(Dict): Key/value pairs passed from the test suite. pos_args(Dict) - List to be added as positional params, in this case we add name of the node on which host maintenance is to be performed as a positional parameter. (Refer example below) Example:: config: service: host command: enter verify: true args: node: name of the node to be placed under maintenance op: operation to be performed enter/exit """ cmd = ["ceph", "orch"] if config.get("base_cmd_args"): cmd.append(config_dict_to_string(config["base_cmd_args"])) cmd.append("host") cmd.append("maintenance") cmd.append(op) verify = config.pop("verify", True) args = config["args"] nodename = args.pop("node") if not nodename: raise HostMaintenanceFailure( "Node on which maintenance mode to be configured is not provided" ) node = get_node_by_id(self.cluster, nodename) if not node: raise ResourceNotFoundError(f"No matching resource found: {nodename}") if not self.get_host(node.hostname): raise HostMaintenanceFailure( "The node specified for maintenance is not deployed in the cluster" ) cmd.append(node.hostname) if op == "enter": manager = Manager(cluster=self.cluster, **config) if not manager.switch_active(node): raise HostMaintenanceFailure( "Unable to switch active mgr to a node other than the input node" ) cmd.append("--force") self.shell(args=cmd) if verify and not self.check_maintenance_status(op, node): raise HostMaintenanceFailure( f"The host maintenance operation {op} was not successful on the host {node.hostname}" )