def _check_heketi_and_gluster_pod_after_node_reboot(self, heketi_node):
        openshift_ops.switch_oc_project(
            self._master, self.storage_project_name)
        heketi_pod = openshift_ops.get_pod_names_from_dc(
            self._master, self.heketi_dc_name)[0]

        # Wait for heketi pod to become ready and running
        openshift_ops.wait_for_pod_be_ready(self._master, heketi_pod)
        heketi_ops.hello_heketi(self._master, self.heketi_server_url)

        # Wait for glusterfs pods to become ready if hosted on same node
        heketi_node_ip = openshift_ops.oc_get_custom_resource(
            self._master, 'pod', '.:status.hostIP', heketi_pod)[0]
        if heketi_node_ip in self.gluster_servers:
            gluster_pod = openshift_ops.get_gluster_pod_name_for_specific_node(
                self._master, heketi_node)

            # Wait for glusterfs pod to become ready
            openshift_ops.wait_for_pod_be_ready(self._master, gluster_pod)
            services = (
                ("glusterd", "running"), ("gluster-blockd", "running"),
                ("tcmu-runner", "running"), ("gluster-block-target", "exited"))
            for service, state in services:
                openshift_ops.check_service_status_on_pod(
                    self._master, gluster_pod, service, "active", state)
 def _wait_for_gluster_pod_after_node_reboot(self, node_hostname):
     """Wait for glusterfs pod to be ready after node reboot"""
     openshift_ops.wait_for_ocp_node_be_ready(
         self.ocp_client, node_hostname)
     gluster_pod = openshift_ops.get_gluster_pod_name_for_specific_node(
         self.ocp_client, node_hostname)
     openshift_ops.wait_for_pod_be_ready(self.ocp_client, gluster_pod)
     services = (
         ("glusterd", "running"), ("gluster-blockd", "running"),
         ("tcmu-runner", "running"), ("gluster-block-target", "exited"))
     for service, state in services:
         openshift_ops.check_service_status_on_pod(
             self.ocp_client, gluster_pod, service, "active", state)
    def _node_reboot(self):
        storage_hostname = (g.config["gluster_servers"]
                            [self.gluster_servers[0]]["storage"])

        cmd = "sleep 3; /sbin/shutdown -r now 'Reboot triggered by Glusto'"
        ret, out, err = g.run(storage_hostname, cmd)

        self.addCleanup(self._wait_for_gluster_pod_to_be_ready)

        if ret != 255:
            err_msg = "failed to reboot host %s error: %s" % (
                storage_hostname, err)
            g.log.error(err_msg)
            raise AssertionError(err_msg)

        try:
            g.ssh_close_connection(storage_hostname)
        except Exception as e:
            g.log.error("failed to close connection with host %s"
                        " with error: %s" % (storage_hostname, e))
            raise

        # added sleep as node will restart after 3 sec
        time.sleep(3)

        for w in Waiter(timeout=600, interval=10):
            try:
                if g.rpyc_get_connection(storage_hostname, user="******"):
                    g.rpyc_close_connection(storage_hostname, user="******")
                    break
            except Exception as err:
                g.log.info("exception while getting connection: '%s'" % err)

        if w.expired:
            error_msg = ("exceeded timeout 600 sec, node '%s' is "
                         "not reachable" % storage_hostname)
            g.log.error(error_msg)
            raise ExecutionError(error_msg)

        # wait for the gluster pod to be in 'Running' state
        self._wait_for_gluster_pod_to_be_ready()

        # glusterd and gluster-blockd service should be up and running
        service_names = ("glusterd", "gluster-blockd", "tcmu-runner")
        for gluster_pod in self.gluster_pod_list:
            for service in service_names:
                g.log.info("gluster_pod - '%s' : gluster_service '%s'" % (
                    gluster_pod, service))
                check_service_status_on_pod(
                    self.oc_node, gluster_pod, service, "running"
                )
Пример #4
0
    def reboot_gluster_node_and_wait_for_services(self):
        gluster_node_ip = (
            g.config["gluster_servers"][self.gluster_servers[0]]["storage"])
        gluster_pod = list(
            filter(lambda pod: (pod["pod_host_ip"] == gluster_node_ip),
                   get_ocp_gluster_pod_details(self.oc_node)))
        if not gluster_pod:
            raise ExecutionError("Gluster pod Host IP '%s' not matched." %
                                 gluster_node_ip)
        gluster_pod = gluster_pod[0]["pod_name"]
        self.addCleanup(wait_for_pod_be_ready, self.oc_node, gluster_pod)
        node_reboot_by_command(gluster_node_ip, timeout=600, wait_step=10)

        # wait for the gluster pod to be in 'Running' state
        wait_for_pod_be_ready(self.oc_node, gluster_pod)

        # glusterd and gluster-blockd service should be up and running
        services = (("glusterd", "running"), ("gluster-blockd", "running"),
                    ("tcmu-runner", "running"), ("gluster-block-target",
                                                 "exited"))
        for service, state in services:
            check_service_status_on_pod(self.oc_node, gluster_pod, service,
                                        "active", state)