def add_interface(self, node_name, network_name, target_interface): """ Create an interface using given network name, return created interface's mac address. Note: Do not use the same network for different tests """ logging.info( f"Creating new interface attached to network: {network_name}, for node: {node_name}" ) net_leases = self.list_leases(network_name) mac_addresses = [] for lease in net_leases: mac_addresses.append(lease['mac']) command = f"virsh attach-interface {node_name} network {network_name} --target {target_interface} --persistent" utils.run_command(command) try: waiting.wait(lambda: len(self.list_leases(network_name)) > len( mac_addresses), timeout_seconds=30, sleep_seconds=2, waiting_for="Wait for network lease") except waiting.exceptions.TimeoutExpired: logging.error("Network lease wasnt found for added interface") raise mac_address = "" new_net_leases = self.list_leases(network_name) for lease in new_net_leases: if not lease['mac'] in mac_addresses: mac_address = lease['mac'] break logging.info( f"Successfully attached interface, network: {network_name}, mac: {mac_address}, for node:" f" {node_name}") return mac_address
def download_logs(client: InventoryClient, cluster: dict, dest: str, must_gather: bool, retry_interval: int = RETRY_INTERVAL): output_folder = get_logs_output_folder(dest, cluster) if os.path.isdir(output_folder): log.info(f"Skipping. The logs directory {output_folder} already exists.") return recreate_folder(output_folder) recreate_folder(os.path.join(output_folder, "cluster_files")) try: write_metadata_file(client, cluster, os.path.join(output_folder, 'metdata.json')) with suppress(assisted_service_client.rest.ApiException): client.download_ignition_files(cluster['id'], os.path.join(output_folder, "cluster_files")) for host_id in map(lambda host: host['id'], cluster['hosts']): with suppress(assisted_service_client.rest.ApiException): client.download_host_ignition(cluster['id'], host_id, os.path.join(output_folder, "cluster_files")) with suppress(assisted_service_client.rest.ApiException): client.download_cluster_events(cluster['id'], os.path.join(output_folder, f"cluster_{cluster['id']}_events.json")) shutil.copy2(os.path.join(os.path.dirname(os.path.realpath(__file__)), "events.html"), output_folder) with suppress(assisted_service_client.rest.ApiException): for i in range(MAX_RETRIES): cluster_logs_tar = os.path.join(output_folder, f"cluster_{cluster['id']}_logs.tar") with suppress(FileNotFoundError): os.remove(cluster_logs_tar) client.download_cluster_logs(cluster['id'], cluster_logs_tar) min_number_of_logs = len(cluster['hosts']) + 1 if cluster['status'] == ClusterStatus.INSTALLED else len(cluster['hosts']) try: verify_logs_uploaded(cluster_logs_tar, min_number_of_logs, cluster['status'] == ClusterStatus.INSTALLED) break except AssertionError as ex: log.warn(f"Cluster logs verification failed: {ex}") # Skip sleeping on last retry if i < MAX_RETRIES - 1: log.info(f"Going to retry in {retry_interval} seconds") time.sleep(retry_interval) kubeconfig_path = os.path.join(output_folder, "kubeconfig-noingress") with suppress(assisted_service_client.rest.ApiException): client.download_kubeconfig_no_ingress(cluster['id'], kubeconfig_path) if must_gather: recreate_folder(os.path.join(output_folder, "must-gather")) config_etc_hosts(cluster['name'], cluster['base_dns_domain'], helper_cluster.get_api_vip_from_cluster(client, cluster)) download_must_gather(kubeconfig_path, os.path.join(output_folder, "must-gather")) finally: run_command(f"chmod -R ugo+rx '{output_folder}'")
def _create_user_file_for_auth(self): if self.authenticated: create_user_file_cmd = ( f"htpasswd -b -c {self.config_dir_path}/squid-users {self.PROXY_USER} {self.PROXY_USER_PASS}" ) utils.run_command(create_user_file_cmd, shell=True) self.user_file_path = f"{self.config_dir_path}/squid-users"
def add_disk_bootflag(disk_path): command = f'virt-format -a {disk_path} --partition=mbr' utils.run_command(command, shell=True, env={ **os.environ, "LIBGUESTFS_BACKEND": "direct" })
def download_must_gather(kubeconfig: str, dest_dir: str): log.info(f"Downloading must-gather to {dest_dir}") command = f"oc --insecure-skip-tls-verify --kubeconfig={kubeconfig} adm must-gather" \ f" --dest-dir {dest_dir} > {dest_dir}/must-gather.log" try: run_command(command, shell=True, raise_errors=True) except RuntimeError as ex: log.warning(f"Failed to run must gather: {ex}")
def download_live_image(download_path, rhcos_version=None): if os.path.exists(download_path): logging.info("Image %s already exists, skipping download", download_path) return logging.info("Downloading iso to %s", download_path) rhcos_version = rhcos_version or os.getenv('RHCOS_VERSION', "46.82.202009222340-0") utils.run_command(f"curl https://releases-art-rhcos.svc.ci.openshift.org/art/storage/releases/rhcos-4.6/" f"{rhcos_version}/x86_64/rhcos-{rhcos_version}-live.x86_64.iso --retry 5 -o {download_path}")
def download_live_image(download_path): if os.path.exists(download_path): logging.info("Image %s already exists, skipping download", download_path) return logging.info("Downloading iso to %s", download_path) # TODO: enable fetching the appropriate rhcos image utils.run_command( f"curl https://mirror.openshift.com/pub/openshift-v4/dependencies/rhcos/pre-release/" f"4.7.0-rc.2/rhcos-4.7.0-rc.2-x86_64-live.x86_64.iso --retry 5 -o {download_path}")
def download_live_image(download_path): if os.path.exists(download_path): logging.info("Image %s already exists, skipping download", download_path) return logging.info("Downloading iso to %s", download_path) # TODO: enable fetching the appropriate rhcos image utils.run_command( f"curl https://mirror.openshift.com/pub/openshift-v4/dependencies/rhcos/4.8/4.8.2/rhcos-live.x86_64.iso" f" --retry 10 --retry-connrefused -o {download_path} --continue-at -")
def get_nodes_infraenv( self) -> Callable[[BaseTerraformConfig, InfraEnvConfig], Nodes]: """ Currently support only single instance of nodes """ nodes_data = dict() @JunitTestCase() def get_nodes_func(tf_config: BaseTerraformConfig, infraenv_config: InfraEnvConfig): if "nodes" in nodes_data: return nodes_data["nodes"] nodes_data["configs"] = infraenv_config, tf_config net_asset = LibvirtNetworkAssets() tf_config.net_asset = net_asset.get() nodes_data["net_asset"] = net_asset controller = TerraformController(tf_config, entity_config=infraenv_config) nodes = Nodes(controller) nodes_data["nodes"] = nodes nodes.prepare_nodes() interfaces = BaseTest.nat_interfaces(tf_config) nat = NatController( interfaces, NatController.get_namespace_index(interfaces[0])) nat.add_nat_rules() nodes_data["nat"] = nat return nodes yield get_nodes_func _nodes: Nodes = nodes_data.get("nodes") _infraenv_config, _tf_config = nodes_data.get("configs") _nat: NatController = nodes_data.get("nat") _net_asset: LibvirtNetworkAssets = nodes_data.get("net_asset") try: if _nodes and global_variables.test_teardown: logging.info('--- TEARDOWN --- node controller\n') _nodes.destroy_all_nodes() logging.info( f'--- TEARDOWN --- deleting iso file from: {_infraenv_config.iso_download_path}\n' ) infra_utils.run_command( f"rm -f {_infraenv_config.iso_download_path}", shell=True) self.teardown_nat(_nat) finally: if _net_asset: _net_asset.release_all()
def format_disk(disk_path): logging.info("Formatting disk %s", disk_path) if not os.path.exists(disk_path): logging.info("Path to %s disk not exists. Skipping") return command = f"qemu-img info {disk_path} | grep 'virtual size'" output = utils.run_command(command, shell=True) image_size = output[0].split(' ')[2] command = f'qemu-img create -f qcow2 {disk_path} {image_size}' utils.run_command(command, shell=True)
def _collect_virsh_logs(cls, nodes: Nodes, log_dir_name): logging.info('Collecting virsh logs\n') os.makedirs(log_dir_name, exist_ok=True) virsh_log_path = os.path.join(log_dir_name, "libvirt_logs") os.makedirs(virsh_log_path, exist_ok=False) libvirt_list_path = os.path.join(virsh_log_path, "virsh_list") infra_utils.run_command(f"virsh list --all >> {libvirt_list_path}", shell=True) libvirt_net_list_path = os.path.join(virsh_log_path, "virsh_net_list") infra_utils.run_command(f"virsh net-list --all >> {libvirt_net_list_path}", shell=True) network_name = nodes.get_cluster_network() virsh_leases_path = os.path.join(virsh_log_path, "net_dhcp_leases") infra_utils.run_command(f"virsh net-dhcp-leases {network_name} >> {virsh_leases_path}", shell=True) messages_log_path = os.path.join(virsh_log_path, "messages.log") shutil.copy('/var/log/messages', messages_log_path) qemu_libvirt_path = os.path.join(virsh_log_path, "qemu_libvirt_logs") os.makedirs(qemu_libvirt_path, exist_ok=False) for node in nodes: shutil.copy(f'/var/log/libvirt/qemu/{node.name}.log', f'{qemu_libvirt_path}/{node.name}-qemu.log') console_log_path = os.path.join(virsh_log_path, "console_logs") os.makedirs(console_log_path, exist_ok=False) for node in nodes: shutil.copy(f'/var/log/libvirt/qemu/{node.name}-console.log', f'{console_log_path}/{node.name}-console.log') libvird_log_path = os.path.join(virsh_log_path, "libvirtd_journal") infra_utils.run_command(f"journalctl --since \"{nodes.setup_time}\" " f"-u libvirtd -D /run/log/journal >> {libvird_log_path}", shell=True)
def prepare_nodes(self, nodes: Nodes, cluster_configuration: ClusterConfig) -> Nodes: try: nodes.prepare_nodes() yield nodes finally: if global_variables.test_teardown: logging.info('--- TEARDOWN --- node controller\n') nodes.destroy_all_nodes() logging.info( f'--- TEARDOWN --- deleting iso file from: {cluster_configuration.iso_download_path}\n' ) infra_utils.run_command( f"rm -f {cluster_configuration.iso_download_path}", shell=True)
def installer_gather(ip, ssh_key, out_dir): stdout, stderr, _ret = utils.run_command( f"{INSTALLER_BINARY} gather bootstrap --log-level debug --bootstrap {ip} --master {ip} --key {ssh_key}" ) with open(INSTALLER_GATHER_DEBUG_STDOUT, "w") as f: f.write(stdout) with open(INSTALLER_GATHER_DEBUG_STDERR, "w") as f: f.write(stderr) matches = re.compile(r'.*logs captured here "(.*)".*').findall(stderr) if len(matches) == 0: logging.warning( f"It seems like installer-gather didn't generate any bundles, stderr: {stderr}" ) return bundle_file_path, *_ = matches logging.info(f"Found installer-gather bundle at path {bundle_file_path}") utils.run_command_with_output(f"tar -xzf {bundle_file_path} -C {out_dir}") os.remove(bundle_file_path) if os.path.exists(bundle_file_path) else None
def _does_rule_exist(self, rule_suffix): check_rule = self._build_rule_string('check', rule_suffix) _, _, exit_code = run_command(check_rule, shell=True, raise_errors=False) return exit_code == 0
def prepare_infraenv_nodes( self, infraenv_nodes: Nodes, infra_env_configuration: InfraEnvConfig) -> Nodes: try: infraenv_nodes.prepare_nodes() yield infraenv_nodes finally: if global_variables.test_teardown: logging.info("--- TEARDOWN --- node controller\n") infraenv_nodes.destroy_all_nodes() logging.info( f"--- TEARDOWN --- deleting iso file from: {infra_env_configuration.iso_download_path}\n" ) infra_utils.run_command( f"rm -f {infra_env_configuration.iso_download_path}", shell=True)
def _does_rule_exist(self) -> bool: check_rule = self._build_command_string(IpTableCommandOption.CHECK) _, _, exit_code = run_command(check_rule, shell=True, raise_errors=False) return exit_code == 0
def __init__(self, config: BaseNodeConfig, entity_config: Union[BaseClusterConfig, BaseInfraEnvConfig]): super().__init__(config, entity_config) self.libvirt_connection: libvirt.virConnect = libvirt.open( 'qemu:///system') self.private_ssh_key_path: Path = config.private_ssh_key_path self._setup_timestamp: str = utils.run_command( "date +\"%Y-%m-%d %T\"")[0]
def _does_rule_exist(cls, rule_suffix): """ Check if rule exists """ check_rule = cls._build_rule_string('check', rule_suffix) _, _, exit_code = run_command(check_rule, shell=True, raise_errors=False) return exit_code == 0
def download_logs(client: InventoryClient, cluster: dict, dest: str, must_gather: bool): output_folder = get_logs_output_folder(dest, cluster) if os.path.isdir(output_folder): log.info( f"Skipping. The logs directory {output_folder} already exists.") return recreate_folder(output_folder) recreate_folder(os.path.join(output_folder, "cluster_files")) write_metadata_file(client, cluster, os.path.join(output_folder, 'metdata.json')) with suppress(assisted_service_client.rest.ApiException): client.download_ignition_files( cluster['id'], os.path.join(output_folder, "cluster_files")) with suppress(assisted_service_client.rest.ApiException): client.download_cluster_events( cluster['id'], os.path.join(output_folder, f"cluster_{cluster['id']}_events.json")) shutil.copy2( os.path.join(os.path.dirname(os.path.realpath(__file__)), "events.html"), output_folder) with suppress(assisted_service_client.rest.ApiException): client.download_cluster_logs( cluster['id'], os.path.join(output_folder, f"cluster_{cluster['id']}_logs.tar")) kubeconfig_path = os.path.join(output_folder, "kubeconfig-noingress") with suppress(assisted_service_client.rest.ApiException): client.download_kubeconfig_no_ingress(cluster['id'], kubeconfig_path) if must_gather: recreate_folder(os.path.join(output_folder, "must-gather")) config_etc_hosts(cluster['name'], cluster['base_dns_domain'], cluster['api_vip']) download_must_gather(kubeconfig_path, os.path.join(output_folder, "must-gather")) run_command("chmod -R ugo+rx '%s'" % output_folder)
def _collect_virsh_logs(cls, nodes: Nodes, log_dir_name): logging.info("Collecting virsh logs\n") os.makedirs(log_dir_name, exist_ok=True) virsh_log_path = os.path.join(log_dir_name, "libvirt_logs") os.makedirs(virsh_log_path, exist_ok=False) libvirt_list_path = os.path.join(virsh_log_path, "virsh_list") infra_utils.run_command(f"virsh list --all >> {libvirt_list_path}", shell=True) libvirt_net_list_path = os.path.join(virsh_log_path, "virsh_net_list") infra_utils.run_command( f"virsh net-list --all >> {libvirt_net_list_path}", shell=True) network_name = nodes.get_cluster_network() virsh_leases_path = os.path.join(virsh_log_path, "net_dhcp_leases") infra_utils.run_command( f"virsh net-dhcp-leases {network_name} >> {virsh_leases_path}", shell=True) messages_log_path = os.path.join(virsh_log_path, "messages.log") try: shutil.copy("/var/log/messages", messages_log_path) except FileNotFoundError: logging.warning( "Failed to copy /var/log/messages, file does not exist") qemu_libvirt_path = os.path.join(virsh_log_path, "qemu_libvirt_logs") os.makedirs(qemu_libvirt_path, exist_ok=False) for node in nodes: try: shutil.copy(f"/var/log/libvirt/qemu/{node.name}.log", f"{qemu_libvirt_path}/{node.name}-qemu.log") except FileNotFoundError: logging.warning( f"Failed to copy {node.name} qemu log, file does not exist" ) console_log_path = os.path.join(virsh_log_path, "console_logs") os.makedirs(console_log_path, exist_ok=False) for node in nodes: try: shutil.copy(f"/var/log/libvirt/qemu/{node.name}-console.log", f"{console_log_path}/{node.name}-console.log") except FileNotFoundError: logging.warning( f"Failed to copy {node.name} console log, file does not exist" ) libvird_log_path = os.path.join(virsh_log_path, "libvirtd_journal") infra_utils.run_command( f'journalctl --since "{nodes.setup_time}" ' f"-u libvirtd -D /run/log/journal >> {libvird_log_path}", shell=True, )
def _does_rule_exist(cls, rule_suffix: str) -> str: """ Check if rule exists """ check_rule = cls._build_rule_string(IpTableCommandOption.CHECK, rule_suffix) _, _, exit_code = run_command(check_rule, shell=True, raise_errors=False) return exit_code == 0
def format_disk(cls, disk_path): logging.info("Formatting disk %s", disk_path) if not os.path.exists(disk_path): logging.info("Path to %s disk not exists. Skipping", disk_path) return command = f"qemu-img info {disk_path} | grep 'virtual size'" output = utils.run_command(command, shell=True) image_size = output[0].split(' ')[2] # Fix for libvirt 6.0.0 if image_size.isdigit(): image_size += "G" cls.create_disk(disk_path, image_size)
def download_logs(client: InventoryClient, cluster: dict, dest: str, must_gather: bool, update_by_events: bool = False, retry_interval: int = RETRY_INTERVAL, pull_secret=""): if "hosts" not in cluster or len(cluster["hosts"]) == 0: cluster["hosts"] = client.get_cluster_hosts(cluster_id=cluster["id"]) output_folder = get_logs_output_folder(dest, cluster) if not is_update_needed(output_folder, update_by_events, client, cluster): log.info(f"Skipping, no need to update {output_folder}.") return recreate_folder(output_folder) recreate_folder(os.path.join(output_folder, "cluster_files")) try: write_metadata_file(client, cluster, os.path.join(output_folder, 'metadata.json')) with suppressAndLog(AssertionError, ConnectionError, requests.exceptions.ConnectionError): client.download_metrics(os.path.join(output_folder, "metrics.txt")) for cluster_file in ("bootstrap.ign", "master.ign", "worker.ign", "install-config.yaml"): with suppressAndLog(assisted_service_client.rest.ApiException): client.download_and_save_file( cluster['id'], cluster_file, os.path.join(output_folder, "cluster_files", cluster_file)) for host_id in map(lambda host: host['id'], cluster['hosts']): with suppressAndLog(assisted_service_client.rest.ApiException): client.download_host_ignition( cluster['id'], host_id, os.path.join(output_folder, "cluster_files")) with suppressAndLog(assisted_service_client.rest.ApiException): client.download_cluster_events( cluster['id'], get_cluster_events_path(cluster, output_folder)) shutil.copy2( os.path.join(os.path.dirname(os.path.realpath(__file__)), "events.html"), output_folder) with suppressAndLog(assisted_service_client.rest.ApiException): are_masters_in_configuring_state = are_host_progress_in_stage( cluster['hosts'], [HostsProgressStages.CONFIGURING], 2) are_masters_in_join_state = are_host_progress_in_stage( cluster['hosts'], [HostsProgressStages.JOINED], 2) max_retries = MUST_GATHER_MAX_RETRIES if are_masters_in_join_state else MAX_RETRIES is_controller_expected = cluster[ 'status'] == ClusterStatus.INSTALLED or are_masters_in_configuring_state min_number_of_logs = min_number_of_log_files( cluster, is_controller_expected) for i in range(max_retries): cluster_logs_tar = os.path.join( output_folder, f"cluster_{cluster['id']}_logs.tar") with suppress(FileNotFoundError): os.remove(cluster_logs_tar) client.download_cluster_logs(cluster['id'], cluster_logs_tar) try: verify_logs_uploaded( cluster_logs_tar, min_number_of_logs, installation_success=( cluster['status'] == ClusterStatus.INSTALLED), check_oc=are_masters_in_join_state) break except AssertionError as ex: log.warn(f"Cluster logs verification failed: {ex}") # Skip sleeping on last retry if i < MAX_RETRIES - 1: log.info(f"Going to retry in {retry_interval} seconds") time.sleep(retry_interval) kubeconfig_path = os.path.join(output_folder, "kubeconfig-noingress") with suppressAndLog(assisted_service_client.rest.ApiException): client.download_kubeconfig_no_ingress(cluster['id'], kubeconfig_path) if must_gather: recreate_folder(os.path.join(output_folder, "must-gather")) config_etc_hosts( cluster['name'], cluster['base_dns_domain'], helper_cluster.get_api_vip_from_cluster( client, cluster, pull_secret)) download_must_gather( kubeconfig_path, os.path.join(output_folder, "must-gather")) finally: run_command(f"chmod -R ugo+rx '{output_folder}'")
def day2_nodes_flow(client, terraform_cluster_dir_prefix, tf_folder, cluster, has_ipv_6, num_worker_nodes, api_vip_ip, api_vip_dnsname, install_cluster_flag, day2_type_flag, with_static_network_config, base_cluster_name): tf_network_name, total_num_nodes = get_network_num_nodes_from_tf(tf_folder) with utils.file_lock_context(): utils.run_command( f'make _apply_terraform CLUSTER_NAME={terraform_cluster_dir_prefix}' ) time.sleep(5) if day2_type_flag == "ocp": num_nodes_to_wait = total_num_nodes installed_status = consts.NodesStatus.INSTALLED else: num_nodes_to_wait = num_worker_nodes installed_status = consts.NodesStatus.DAY2_INSTALLED utils.wait_till_nodes_are_ready(nodes_count=num_nodes_to_wait, network_name=tf_network_name) waiting.wait( lambda: utils.are_libvirt_nodes_in_cluster_hosts( client, cluster.id, num_nodes_to_wait), timeout_seconds=consts.NODES_REGISTERED_TIMEOUT, sleep_seconds=10, waiting_for="Nodes to be registered in inventory service", ) set_nodes_hostnames_if_needed(client, tf_folder, with_static_network_config, has_ipv_6, tf_network_name, cluster.id) utils.wait_till_all_hosts_are_in_status( client=client, cluster_id=cluster.id, nodes_count=num_worker_nodes, statuses=[consts.NodesStatus.KNOWN], interval=30, ) if install_cluster_flag: log.info("Start installing all known nodes in the cluster %s", cluster.id) kubeconfig = utils.get_kubeconfig_path(base_cluster_name) ocp_orig_ready_nodes = get_ocp_cluster_ready_nodes_num(kubeconfig) hosts = client.get_cluster_hosts(cluster.id) [ client.install_day2_host(cluster.id, host['id']) for host in hosts if host["status"] == 'known' ] log.info( "Start waiting until all nodes of cluster %s have been installed( reached added-to-existing-clustertate)", cluster.id) utils.wait_till_all_hosts_are_in_status( client=client, cluster_id=cluster.id, nodes_count=num_nodes_to_wait, statuses=[installed_status], interval=30, ) log.info( "Start waiting until installed nodes has actually been added to the OCP cluster" ) waiting.wait(lambda: wait_nodes_join_ocp_cluster( ocp_orig_ready_nodes, num_worker_nodes, day2_type_flag, kubeconfig ), timeout_seconds=consts.NODES_REGISTERED_TIMEOUT, sleep_seconds=30, waiting_for="Day2 nodes to be added to OCP cluster", expected_exceptions=Exception) log.info("%d worker nodes were successfully added to OCP cluster", num_worker_nodes)
def _delete_rule(cls, rule_suffix): """ Insert a new rule """ delete_rule = cls._build_rule_string('delete', rule_suffix) logging.info("Delete rule \"%s\"", delete_rule) run_command(delete_rule, shell=True)
def _insert_rule(cls, rule_suffix): """ Insert a new rule """ insert_rule = cls._build_rule_string('insert', rule_suffix) logging.info("Adding rule \"%s\"", insert_rule) run_command(insert_rule, shell=True)
def _get_default_interfaces(): """ Find all interfaces that have default route on them. Usually it is a single interface. """ interfaces, _, _ = run_command( r"ip -4 route | egrep '^default ' | awk '{print $5}'", shell=True) return set(interfaces.strip().split())
def create_disk(disk_path, disk_size): command = f'qemu-img create -f qcow2 {disk_path} {disk_size}' utils.run_command(command, shell=True)
def __init__(self, **kwargs): self.libvirt_connection = libvirt.open('qemu:///system') self.private_ssh_key_path = kwargs.get("private_ssh_key_path") self._setup_timestamp = utils.run_command("date +\"%Y-%m-%d %T\"")[0]
def undefine_interface(self, node_name, mac): logging.info( f"Undefining an interface mac: {mac}, for node: {node_name}") command = f"virsh detach-interface {node_name} --type network --mac {mac}" utils.run_command(command, True) logging.info("Successfully removed interface.")