def nodes(self, setup_node_controller, request): if hasattr(request, 'param'): node_vars = self.override_node_parameters(**request.param) else: node_vars = env_variables net_asset = None needs_nat = node_vars.get("platform") == consts.Platforms.NONE try: if not qe_env: net_asset = NetworkAssets() node_vars["net_asset"] = net_asset.get() controller = setup_node_controller(**node_vars) nodes = Nodes(controller, node_vars["private_ssh_key_path"]) nodes.prepare_nodes() if needs_nat: nodes.configure_nat() yield nodes if env_variables['test_teardown']: logging.info('--- TEARDOWN --- node controller\n') nodes.destroy_all_nodes() if needs_nat: nodes.unconfigure_nat() finally: if not qe_env: net_asset.release_all()
def get_nodes_func(tf_config: BaseTerraformConfig, infraenv_config: InfraEnvConfig): if "nodes" in nodes_data: return nodes_data["nodes"] nodes_data["configs"] = infraenv_config, tf_config net_asset = LibvirtNetworkAssets() tf_config.net_asset = net_asset.get() nodes_data["net_asset"] = net_asset controller = TerraformController(tf_config, entity_config=infraenv_config) nodes = Nodes(controller) nodes_data["nodes"] = nodes nodes.prepare_nodes() interfaces = BaseTest.nat_interfaces(tf_config) nat = NatController( interfaces, NatController.get_namespace_index(interfaces[0])) nat.add_nat_rules() nodes_data["nat"] = nat return nodes
def nodes(self, setup_node_controller): controller = setup_node_controller nodes = Nodes(controller, env_variables["private_ssh_key_path"]) nodes.set_correct_boot_order(start_nodes=False) yield nodes nodes.shutdown_all() nodes.format_all_disks()
def get_nodes_func(config: TerraformConfig = TerraformConfig()): if "nodes" in nodes_data: return nodes_data["nodes"] net_asset = NetworkAssets() config.net_asset = net_asset.get() nodes = Nodes(TerraformController(config), config.private_ssh_key_path, net_asset, config.platform == consts.Platforms.NONE) nodes.prepare_nodes() nodes_data["nodes"] = nodes return nodes
def prepare_nodes(self, nodes: Nodes, cluster_configuration: ClusterConfig) -> Nodes: try: nodes.prepare_nodes() yield nodes finally: if global_variables.test_teardown: logging.info('--- TEARDOWN --- node controller\n') nodes.destroy_all_nodes() logging.info( f'--- TEARDOWN --- deleting iso file from: {cluster_configuration.iso_download_path}\n' ) infra_utils.run_command( f"rm -f {cluster_configuration.iso_download_path}", shell=True)
def _collect_virsh_logs(cls, nodes: Nodes, log_dir_name): logging.info('Collecting virsh logs\n') os.makedirs(log_dir_name, exist_ok=True) virsh_log_path = os.path.join(log_dir_name, "libvirt_logs") os.makedirs(virsh_log_path, exist_ok=False) libvirt_list_path = os.path.join(virsh_log_path, "virsh_list") infra_utils.run_command(f"virsh list --all >> {libvirt_list_path}", shell=True) libvirt_net_list_path = os.path.join(virsh_log_path, "virsh_net_list") infra_utils.run_command(f"virsh net-list --all >> {libvirt_net_list_path}", shell=True) network_name = nodes.get_cluster_network() virsh_leases_path = os.path.join(virsh_log_path, "net_dhcp_leases") infra_utils.run_command(f"virsh net-dhcp-leases {network_name} >> {virsh_leases_path}", shell=True) messages_log_path = os.path.join(virsh_log_path, "messages.log") shutil.copy('/var/log/messages', messages_log_path) qemu_libvirt_path = os.path.join(virsh_log_path, "qemu_libvirt_logs") os.makedirs(qemu_libvirt_path, exist_ok=False) for node in nodes: shutil.copy(f'/var/log/libvirt/qemu/{node.name}.log', f'{qemu_libvirt_path}/{node.name}-qemu.log') console_log_path = os.path.join(virsh_log_path, "console_logs") os.makedirs(console_log_path, exist_ok=False) for node in nodes: shutil.copy(f'/var/log/libvirt/qemu/{node.name}-console.log', f'{console_log_path}/{node.name}-console.log') libvird_log_path = os.path.join(virsh_log_path, "libvirtd_journal") infra_utils.run_command(f"journalctl --since \"{nodes.setup_time}\" " f"-u libvirtd -D /run/log/journal >> {libvird_log_path}", shell=True)
def prepare_infraenv_nodes( self, infraenv_nodes: Nodes, infra_env_configuration: InfraEnvConfig) -> Nodes: try: infraenv_nodes.prepare_nodes() yield infraenv_nodes finally: if global_variables.test_teardown: logging.info("--- TEARDOWN --- node controller\n") infraenv_nodes.destroy_all_nodes() logging.info( f"--- TEARDOWN --- deleting iso file from: {infra_env_configuration.iso_download_path}\n" ) infra_utils.run_command( f"rm -f {infra_env_configuration.iso_download_path}", shell=True)
def waiting_for_installation_completion(controller): vm_ip = controller.master_ips[0][0] try: logging.info("Configuring /etc/hosts...") utils.config_etc_hosts(cluster_name=controller.cluster_name, base_dns_domain=controller.cluster_domain, api_vip=vm_ip) logging.info("Waiting for installation to complete...") waiting.wait(all_operators_up, sleep_seconds=20, timeout_seconds=60 * 60, waiting_for="all operators to get up") logging.info("Installation completed successfully!") finally: logging.info("Gathering sosreport data from host...") node = Nodes(controller, private_ssh_key_path=SSH_KEY)[0] gather_sosreport_data(node) logging.info("Gathering information via installer-gather...") utils.recreate_folder(INSTALLER_GATHER_DIR, force_recreate=True) installer_gather(ip=vm_ip, ssh_key=SSH_KEY, out_dir=INSTALLER_GATHER_DIR) logging.info("Gathering information via must-gather...") utils.recreate_folder(MUST_GATHER_DIR) download_must_gather(KUBE_CONFIG, MUST_GATHER_DIR)
def log_collection(controller, vm_ip): etype, _value, _tb = sys.exc_info() logging.info( f"Collecting logs after a {('failed', 'successful')[etype is None]} installation" ) try: logging.info("Gathering sosreport data from host...") node = Nodes(controller, private_ssh_key_path=SSH_KEY)[0] gather_sosreport_data(node) except Exception: logging.exception("sosreport gathering failed!") utils.retry() try: logging.info("Gathering information via installer-gather...") utils.recreate_folder(INSTALLER_GATHER_DIR, force_recreate=True) installer_gather(ip=vm_ip, ssh_key=SSH_KEY, out_dir=INSTALLER_GATHER_DIR) except Exception: logging.exception("installer-gather failed!") try: logging.info("Gathering information via must-gather...") utils.recreate_folder(MUST_GATHER_DIR) download_must_gather(KUBE_CONFIG, MUST_GATHER_DIR) except Exception: logging.exception("must-gather failed!")
def kube_api_test_prepare_late_binding_infraenv( kube_api_context, nodes: Nodes, infraenv_config: InfraEnvConfig, *, is_ipv4=True ): infraenv_name = infraenv_config.entity_name.get() secret = Secret( kube_api_client=kube_api_context.api_client, name=f"{infraenv_name}-secret", namespace=global_variables.spoke_namespace, ) secret.create(pull_secret=infraenv_config.pull_secret) ignition_config_override = None infra_env = InfraEnv( kube_api_client=kube_api_context.api_client, name=f"{infraenv_name}-infra-env", namespace=global_variables.spoke_namespace, ) infra_env.create( cluster_deployment=None, ignition_config_override=ignition_config_override, secret=secret, proxy=None, ssh_pub_key=infraenv_config.ssh_public_key, ) infra_env.status() download_iso_from_infra_env(infra_env, infraenv_config.iso_download_path) logger.info("iso downloaded, starting nodes") nodes.start_all() logger.info("waiting for host agent") agents = infra_env.wait_for_agents(len(nodes)) for agent in agents: agent.approve() set_agent_hostname(nodes[0], agent, is_ipv4) # Currently only supports single node logger.info("Waiting for agent status verification") Agent.wait_for_agents_to_be_ready_for_install(agents) return infra_env
def _collect_virsh_logs(cls, nodes: Nodes, log_dir_name): logging.info("Collecting virsh logs\n") os.makedirs(log_dir_name, exist_ok=True) virsh_log_path = os.path.join(log_dir_name, "libvirt_logs") os.makedirs(virsh_log_path, exist_ok=False) libvirt_list_path = os.path.join(virsh_log_path, "virsh_list") infra_utils.run_command(f"virsh list --all >> {libvirt_list_path}", shell=True) libvirt_net_list_path = os.path.join(virsh_log_path, "virsh_net_list") infra_utils.run_command( f"virsh net-list --all >> {libvirt_net_list_path}", shell=True) network_name = nodes.get_cluster_network() virsh_leases_path = os.path.join(virsh_log_path, "net_dhcp_leases") infra_utils.run_command( f"virsh net-dhcp-leases {network_name} >> {virsh_leases_path}", shell=True) messages_log_path = os.path.join(virsh_log_path, "messages.log") try: shutil.copy("/var/log/messages", messages_log_path) except FileNotFoundError: logging.warning( "Failed to copy /var/log/messages, file does not exist") qemu_libvirt_path = os.path.join(virsh_log_path, "qemu_libvirt_logs") os.makedirs(qemu_libvirt_path, exist_ok=False) for node in nodes: try: shutil.copy(f"/var/log/libvirt/qemu/{node.name}.log", f"{qemu_libvirt_path}/{node.name}-qemu.log") except FileNotFoundError: logging.warning( f"Failed to copy {node.name} qemu log, file does not exist" ) console_log_path = os.path.join(virsh_log_path, "console_logs") os.makedirs(console_log_path, exist_ok=False) for node in nodes: try: shutil.copy(f"/var/log/libvirt/qemu/{node.name}-console.log", f"{console_log_path}/{node.name}-console.log") except FileNotFoundError: logging.warning( f"Failed to copy {node.name} console log, file does not exist" ) libvird_log_path = os.path.join(virsh_log_path, "libvirtd_journal") infra_utils.run_command( f'journalctl --since "{nodes.setup_time}" ' f"-u libvirtd -D /run/log/journal >> {libvird_log_path}", shell=True, )
def get_nodes_func(tf_config: TerraformConfig, cluster_config: ClusterConfig): if "nodes" in nodes_data: return nodes_data["nodes"] nodes_data["configs"] = cluster_config, tf_config net_asset = LibvirtNetworkAssets() tf_config.net_asset = net_asset.get() nodes_data["net_asset"] = net_asset controller = TerraformController(tf_config, cluster_config=cluster_config) nodes = Nodes(controller, tf_config.private_ssh_key_path) nodes_data["nodes"] = nodes nodes.prepare_nodes() interfaces = BaseTest.nat_interfaces(tf_config) nat = NatController(interfaces, NatController.get_namespace_index(interfaces[0])) nat.add_nat_rules() nodes_data["nat"] = nat return nodes
def get_nodes_func(config: Optional[TerraformConfig] = None): if not config: config = TerraformConfig() if "nodes" in nodes_data: return nodes_data["nodes"] net_asset = LibvirtNetworkAssets() config.net_asset = net_asset.get() controller = TerraformController(config) nodes = Nodes(controller, config.private_ssh_key_path) nodes.prepare_nodes() interfaces = BaseTest.nat_interfaces(config) nat = NatController( interfaces, NatController.get_namespace_index(interfaces[0])) nat.add_nat_rules() nodes_data["nodes"] = nodes nodes_data["config"] = config nodes_data["net_asset"] = net_asset nodes_data["nat"] = nat return nodes
def get_nodes_func(config: TerraformConfig = TerraformConfig()): if "nodes" in nodes_data: return nodes_data["nodes"] nodes_data["needs_nat"] = config.platform == consts.Platforms.NONE nodes_data["net_asset"] = NetworkAssets() config.net_asset = nodes_data["net_asset"].get() nodes = Nodes(TerraformController(config), config.private_ssh_key_path) nodes.prepare_nodes() if nodes_data["needs_nat"]: nodes.configure_nat() nodes_data["nodes"] = nodes return nodes
def nodes(self, setup_node_controller): net_asset = None try: if not qe_env: net_asset = NetworkAssets() env_variables["net_asset"] = net_asset.get() controller = setup_node_controller(**env_variables) nodes = Nodes(controller, env_variables["private_ssh_key_path"]) nodes.prepare_nodes() yield nodes logging.info(f'--- TEARDOWN --- node controller\n') nodes.destroy_all_nodes() finally: if not qe_env: net_asset.release_all()
def nodes(self, setup_node_controller, request): if hasattr(request, 'param'): node_vars = self.override_node_parameters(**request.param) else: node_vars = env_variables net_asset = None try: if not qe_env: net_asset = NetworkAssets() node_vars["net_asset"] = net_asset.get() controller = setup_node_controller(**node_vars) nodes = Nodes(controller, node_vars["private_ssh_key_path"]) nodes.prepare_nodes() yield nodes logging.info('--- TEARDOWN --- node controller\n') nodes.destroy_all_nodes() finally: if not qe_env: net_asset.release_all()
def nodes(self, controller: NodeController) -> Nodes: return Nodes(controller)
def infraenv_nodes(self, infraenv_controller: NodeController) -> Nodes: return Nodes(infraenv_controller)
def kube_api_test( kube_api_context, nodes: Nodes, cluster_config: ClusterConfig, proxy_server=None, *, is_ipv4=True, is_disconnected=False, ): cluster_name = cluster_config.cluster_name.get() # TODO resolve it from the service if the node controller doesn't have this information # (please see cluster.get_primary_machine_cidr()) machine_cidr = nodes.controller.get_primary_machine_cidr() agent_cluster_install = AgentClusterInstall( kube_api_client=kube_api_context.api_client, name=f"{cluster_name}-agent-cluster-install", namespace=global_variables.spoke_namespace, ) secret = Secret( kube_api_client=kube_api_context.api_client, name=f"{cluster_name}-secret", namespace=global_variables.spoke_namespace, ) secret.create(pull_secret=cluster_config.pull_secret) cluster_deployment = ClusterDeployment( kube_api_client=kube_api_context.api_client, name=cluster_name, namespace=global_variables.spoke_namespace, ) cluster_deployment.create( agent_cluster_install_ref=agent_cluster_install.ref, secret=secret, ) agent_cluster_install.create( cluster_deployment_ref=cluster_deployment.ref, image_set_ref=deploy_image_set(cluster_name, kube_api_context), cluster_cidr=cluster_config.cluster_networks[0].cidr, host_prefix=cluster_config.cluster_networks[0].host_prefix, service_network=cluster_config.service_networks[0].cidr, ssh_pub_key=cluster_config.ssh_public_key, hyperthreading=cluster_config.hyperthreading, control_plane_agents=nodes.controller.params.master_count, worker_agents=nodes.controller.params.worker_count, machine_cidr=machine_cidr, ) agent_cluster_install.wait_to_be_ready(False) if is_disconnected: logger.info("getting igntion and install config override for disconected install") ca_bundle = get_ca_bundle_from_hub() patch_install_config_with_ca_bundle(cluster_deployment, ca_bundle) ignition_config_override = get_ignition_config_override(ca_bundle) else: ignition_config_override = None proxy = setup_proxy(cluster_config, machine_cidr, cluster_name, proxy_server) infra_env = InfraEnv( kube_api_client=kube_api_context.api_client, name=f"{cluster_name}-infra-env", namespace=global_variables.spoke_namespace, ) infra_env.create( cluster_deployment=cluster_deployment, ignition_config_override=ignition_config_override, secret=secret, proxy=proxy, ssh_pub_key=cluster_config.ssh_public_key, ) infra_env.status() download_iso_from_infra_env(infra_env, cluster_config.iso_download_path) logger.info("iso downloaded, starting nodes") nodes.start_all() logger.info("waiting for host agent") agents = cluster_deployment.wait_for_agents(len(nodes)) for agent in agents: agent.approve() set_agent_hostname(nodes[0], agent, is_ipv4) # Currently only supports single node if len(nodes) == 1: set_single_node_ip(cluster_deployment, nodes, is_ipv4) logger.info("Waiting for agent status verification") Agent.wait_for_agents_to_install(agents) agent_cluster_install.wait_to_be_ready(True) logger.info("waiting for agent-cluster-install to be in installing state") agent_cluster_install.wait_to_be_installing() try: logger.info("installation started, waiting for completion") agent_cluster_install.wait_to_be_installed() logger.info("installation completed successfully") except Exception: logger.exception("Failure during kube-api installation flow:") collect_debug_info_from_cluster(cluster_deployment, agent_cluster_install)