Exemplo n.º 1
0
    def waiting_for_installation_completion(
            self,
            controller: NodeController,
            cluster_configuration: ClusterConfig,
            skip_logs=False):
        master_ip = controller.master_ips[0][0]

        try:
            log.info("Configuring /etc/hosts...")
            utils.config_etc_hosts(
                cluster_name=cluster_configuration.cluster_name.get(),
                base_dns_domain=cluster_configuration.base_dns_domain,
                api_vip=master_ip,
            )

            log.info("Waiting for installation to complete...")
            waiting.wait(
                self.all_operators_available,
                sleep_seconds=20,
                timeout_seconds=60 * 60,
                waiting_for="all operators to get up",
            )
            log.info("Installation completed successfully!")
        except Exception:
            log.exception(
                "An unexpected error has occurred while waiting for installation to complete"
            )
            # In case of error, always collect logs
            self.log_collection(master_ip)
            raise
        else:
            # If successful, collect logs only if caller asked not to skip
            if not skip_logs:
                self.log_collection(master_ip)
Exemplo n.º 2
0
 def update_oc_config(nodes, cluster):
     os.environ["KUBECONFIG"] = cluster.kubeconfig_path
     if nodes.masters_count == 1:
         main_cidr = cluster.get_primary_machine_cidr()
         api_vip = cluster.get_ip_for_single_node(cluster.api_client,
                                                  cluster.id, main_cidr)
     else:
         vips = nodes.controller.get_ingress_and_api_vips()
         api_vip = vips["api_vip"]
     utils.config_etc_hosts(
         cluster_name=cluster.name,
         base_dns_domain=global_variables.base_dns_domain,
         api_vip=api_vip)
def waiting_for_installation_completion(controller):
    vm_ip = controller.master_ips[0][0]

    try:
        logging.info("Configuring /etc/hosts...")
        utils.config_etc_hosts(cluster_name=controller.cluster_name,
                               base_dns_domain=controller.cluster_domain,
                               api_vip=vm_ip)

        logging.info("Waiting for installation to complete...")
        waiting.wait(all_operators_up,
                     sleep_seconds=20,
                     timeout_seconds=60 * 60,
                     waiting_for="all operators to get up")
        logging.info("Installation completed successfully!")
    finally:
        log_collection(vm_ip)
Exemplo n.º 4
0
def _must_gather_kube_api(cluster_name, cluster_deployment,
                          agent_cluster_install, output_folder):
    kubeconfig_path = os.path.join(output_folder, "kubeconfig",
                                   f"{cluster_name}_kubeconfig.yaml")
    agent_spec = agent_cluster_install.get_spec()
    agent_cluster_install.download_kubeconfig(kubeconfig_path=kubeconfig_path)
    log.info("Agent cluster install spec %s", agent_spec)

    # in case of single node we should set node ip and not vip
    if agent_spec.get("provisionRequirements", {}).get("controlPlaneAgents",
                                                       3) == 1:
        kube_api_ip = get_ip_for_single_node(
            cluster_deployment,
            is_cidr_is_ipv4(
                agent_spec["networking"]["machineNetwork"][0]["cidr"]))
    else:
        kube_api_ip = agent_cluster_install.get_spec()["apiVIP"]

    config_etc_hosts(
        cluster_name,
        cluster_deployment.get()["spec"]["baseDomain"],
        kube_api_ip,
    )
    download_must_gather(kubeconfig_path, output_folder)
Exemplo n.º 5
0
def download_logs(
    client: InventoryClient,
    cluster: dict,
    dest: str,
    must_gather: bool,
    update_by_events: bool = False,
    retry_interval: int = RETRY_INTERVAL,
):
    if "hosts" not in cluster or len(cluster["hosts"]) == 0:
        cluster["hosts"] = client.get_cluster_hosts(cluster_id=cluster["id"])

    output_folder = get_logs_output_folder(dest, cluster)
    if not is_update_needed(output_folder, update_by_events, client, cluster):
        log.info(f"Skipping, no need to update {output_folder}.")
        return

    recreate_folder(output_folder)
    recreate_folder(os.path.join(output_folder, "cluster_files"))

    try:
        write_metadata_file(client, cluster,
                            os.path.join(output_folder, "metadata.json"))

        with SuppressAndLog(requests.exceptions.RequestException,
                            ConnectionError, KeyboardInterrupt):
            client.download_metrics(os.path.join(output_folder, "metrics.txt"))

        for cluster_file in (
                "bootstrap.ign",
                "master.ign",
                "worker.ign",
                "install-config.yaml",
        ):
            with SuppressAndLog(assisted_service_client.rest.ApiException,
                                KeyboardInterrupt):
                client.download_and_save_file(
                    cluster["id"], cluster_file,
                    os.path.join(output_folder, "cluster_files", cluster_file))

        with SuppressAndLog(assisted_service_client.rest.ApiException,
                            KeyboardInterrupt):
            download_manifests(client, cluster["id"], output_folder)

        infra_env_list = set()
        for host_id, infra_env_id in map(
                lambda host: (host["id"], host["infra_env_id"]),
                cluster["hosts"]):
            with SuppressAndLog(assisted_service_client.rest.ApiException,
                                KeyboardInterrupt):
                client.download_host_ignition(
                    infra_env_id, host_id,
                    os.path.join(output_folder, "cluster_files"))
            if infra_env_id not in infra_env_list:
                infra_env_list.add(infra_env_id)
                with SuppressAndLog(assisted_service_client.rest.ApiException,
                                    KeyboardInterrupt):
                    client.download_infraenv_events(
                        infra_env_id,
                        get_infraenv_events_path(infra_env_id, output_folder))

        with SuppressAndLog(assisted_service_client.rest.ApiException,
                            KeyboardInterrupt):
            client.download_cluster_events(
                cluster["id"], get_cluster_events_path(cluster, output_folder))
            shutil.copy2(
                os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             "events.html"), output_folder)

        with SuppressAndLog(assisted_service_client.rest.ApiException,
                            KeyboardInterrupt):
            are_masters_in_configuring_state = are_host_progress_in_stage(
                cluster["hosts"], [HostsProgressStages.CONFIGURING], 2)
            are_masters_in_join_or_done_state = are_host_progress_in_stage(
                cluster["hosts"],
                [HostsProgressStages.JOINED, HostsProgressStages.DONE], 2)
            max_retries = MUST_GATHER_MAX_RETRIES if are_masters_in_join_or_done_state else MAX_RETRIES
            is_controller_expected = cluster[
                "status"] == ClusterStatus.INSTALLED or are_masters_in_configuring_state
            min_number_of_logs = min_number_of_log_files(
                cluster, is_controller_expected)

            for i in range(max_retries):
                cluster_logs_tar = os.path.join(
                    output_folder, f"cluster_{cluster['id']}_logs.tar")

                with suppress(FileNotFoundError):
                    os.remove(cluster_logs_tar)

                client.download_cluster_logs(cluster["id"], cluster_logs_tar)
                try:
                    verify_logs_uploaded(
                        cluster_logs_tar,
                        min_number_of_logs,
                        installation_success=(
                            cluster["status"] == ClusterStatus.INSTALLED),
                        check_oc=are_masters_in_join_or_done_state,
                    )
                    break
                except AssertionError as ex:
                    log.warning("Cluster logs verification failed: %s", ex)

                    # Skip sleeping on last retry
                    if i < MAX_RETRIES - 1:
                        log.info(f"Going to retry in {retry_interval} seconds")
                        time.sleep(retry_interval)

        kubeconfig_path = os.path.join(output_folder, "kubeconfig-noingress")

        with SuppressAndLog(assisted_service_client.rest.ApiException):
            client.download_kubeconfig_no_ingress(cluster["id"],
                                                  kubeconfig_path)

            if must_gather:
                config_etc_hosts(
                    cluster["name"],
                    cluster["base_dns_domain"],
                    client.get_api_vip(cluster, cluster["id"]),
                )
                download_must_gather(kubeconfig_path, output_folder)

    finally:
        run_command(f"chmod -R ugo+rx '{output_folder}'")