def validate_project_exists(self, pvc_obj):
        """
        This function checks whether the new project exists in the
        EFK stack
        """
        pod_list = get_all_pods(namespace='openshift-logging')
        elasticsearch_pod = [
            pod.name for pod in pod_list if pod.name.startswith('elasticsearch')
        ]
        elasticsearch_pod_obj = get_pod_obj(
            name=elasticsearch_pod[1], namespace='openshift-logging'
        )
        project_index = elasticsearch_pod_obj.exec_cmd_on_pod(
            command='indices', out_yaml_format=False
        )
        project = pvc_obj.project.namespace

        if project in project_index:
            logger.info(f'The project {project} exists in the EFK stack')
            for item in project_index.split("\n"):
                if project in item:
                    logger.info(item.strip())
                    assert 'green' in item.strip(), f"Project {project} is Unhealthy"
        else:
            raise ModuleNotFoundError
Exemplo n.º 2
0
def validate_image_exists(app="redis"):
    """
    Validate image exists on registries path
    Args:
        app (str): Label or application name

    Returns:
        image_list (str): Dir/Files/Images are listed in string format

    Raises:
        Exceptions if dir/folders not found
    """
    if not config.DEPLOYMENT.get("disconnected"):
        pod_list = get_pod_name_by_pattern(
            pattern="image-registry",
            namespace=constants.OPENSHIFT_IMAGE_REGISTRY_NAMESPACE,
        )
        for pod_name in pod_list:
            if "cluster" not in pod_name:
                pod_obj = pod.get_pod_obj(
                    name=pod_name,
                    namespace=constants.OPENSHIFT_IMAGE_REGISTRY_NAMESPACE,
                )

                return pod_obj.exec_cmd_on_pod(
                    command=
                    f"find /registry/docker/registry/v2/repositories/openshift/{app}"
                )
Exemplo n.º 3
0
def validate_image_exists(namespace=None):
    """
    Validate image exists on registries path

    Args:
        namespace (str): Namespace where the images/builds are created

    Returns:
        image_list (str): Dir/Files/Images are listed in string format

    Raises:
        Exceptions if dir/folders not found

    """

    if not config.DEPLOYMENT.get('disconnected'):
        pod_list = get_pod_name_by_pattern(
            pattern="image-registry",
            namespace=constants.OPENSHIFT_IMAGE_REGISTRY_NAMESPACE)
        for pod_name in pod_list:
            if "cluster" not in pod_name:
                pod_obj = pod.get_pod_obj(
                    name=pod_name,
                    namespace=constants.OPENSHIFT_IMAGE_REGISTRY_NAMESPACE)
                return pod_obj.exec_cmd_on_pod(
                    command=
                    f"find /registry/docker/registry/v2/repositories/{namespace}"
                )
Exemplo n.º 4
0
    def get_node_name_where_jenkins_pod_not_hosted(
        self, node_type=constants.WORKER_MACHINE, num_of_nodes=1
    ):
        """
        get nodes

        Args:
            node_type (str): The node type  (e.g. worker, master)
            num_of_nodes (int): The number of nodes to be returned

        Returns:
            list: List of compute node names
        """
        if node_type == constants.MASTER_MACHINE:
            nodes_drain = [node.name for node in get_typed_nodes(
                node_type=node_type, num_of_nodes=num_of_nodes
            )]
        elif node_type == constants.WORKER_MACHINE:
            pod_objs = []
            for project in self.projects:
                pod_names = get_pod_name_by_pattern(
                    pattern='jenkins', namespace=project
                )
                pod_obj = [get_pod_obj(name=pod_name, namespace=project) for pod_name in pod_names]
                pod_objs += pod_obj
            nodes_app_name = set(get_app_pod_running_nodes(pod_objs))
            nodes_worker_name = set(get_worker_nodes())
            nodes_drain = nodes_worker_name - nodes_app_name
        else:
            raise ValueError('The node type is worker or master')
        return list(nodes_drain)[:num_of_nodes]
Exemplo n.º 5
0
def check_health_of_clusterlogging():
    """
    * Checks for ElasticSearch, curator, fluentd and kibana pods in
    openshift-logging namespace

    * And check for the health of cluster logging, If status is green then the
    cluster is healthy,if status is red then health is bad

    Returns:
        list: Gives all the pods that are present in the namespace

    """

    pod_list = []
    pods = get_all_pods(namespace='openshift-logging')
    logger.info("Pods that are created by the instance")
    for pod in pods:
        pod_list.append(pod.name)
    logger.info(pod_list)
    elasticsearch_pod = [
        pod for pod in pod_list if pod.startswith('elasticsearch')
    ]
    logger.info(elasticsearch_pod)
    pod_obj = get_pod_obj(name=elasticsearch_pod[0],
                          namespace='openshift-logging')
    status_check = pod_obj.exec_cmd_on_pod(
        command='es_util --query=_cluster/health?pretty',
        out_yaml_format=False)
    logger.info(status_check)
    status_check = json.loads(status_check)
    if status_check['status'] == 'green':
        logger.info("Cluster logging is in Healthy state & Ready to use")
    else:
        logger.error("Cluster logging is in Bad state")
    return pod_list
Exemplo n.º 6
0
    def validate_messages_are_produced(
        self, namespace=constants.AMQ_NAMESPACE, value="10000", since_time=1800
    ):
        """
        Validates if all messages are sent in producer pod

        Args:
            namespace (str): Namespace of the pod
            value (str): Number of messages are sent
            since_time (int): Number of seconds to required to sent the msg

        Raises exception on failures

        """
        # ToDo: Support multiple topics and users
        producer_pod_objs = [
            get_pod_obj(pod)
            for pod in get_pod_name_by_pattern("hello-world-produce", namespace)
        ]
        for pod in producer_pod_objs:
            for msg in TimeoutSampler(
                900, 30, self.validate_msg, pod.name, namespace, value, since_time
            ):
                if msg:
                    break
        assert msg, "Few messages are not sent by producer"
        log.info("Producer sent all messages")
    def get_new_pods(self, pod_list):
        """
        Fetches info about the respun pods in the cluster

        Args:
            pod_list (list): list of previous pod objects

        Returns:
            list : list of respun pod objects
        """
        new_pods = []
        for pod_obj in pod_list:
            if any(str in pod_obj.name for str in ['mon', 'osd']):
                pod_label = pod_obj.labels.get('pod-template-hash')
                label_selector = f'pod-template-hash={pod_label}'
            else:
                pod_label = pod_obj.labels.get('deploymentconfig')
                label_selector = f'deploymentconfig={pod_label}'

            pods_data = pod.get_pods_having_label(
                label_selector, pod_obj.namespace
            )
            for pod_data in pods_data:
                pod_name = pod_data.get('metadata').get('name')
                if '-deploy' not in pod_name and pod_name not in pod_obj.name:
                    new_pods.append(
                        pod.get_pod_obj(pod_name, pod_obj.namespace)
                    )
        logger.info(
            f"Previous pods: {[pod_obj.name for pod_obj in pod_list]}"
        )
        logger.info(
            f"Respun pods: {[pod_obj.name for pod_obj in new_pods]}"
        )
        return new_pods
Exemplo n.º 8
0
    def test_create_new_project_to_verify_logging(
            self, create_pvc_and_deploymentconfig_pod):
        """
        This function creates new project to verify logging in EFK stack
        1. Creates new project
        2. Creates PVC
        3. Creates Deployment pod in the new_project and run-io on the app pod
        4. Logs into the EFK stack and check for new_project
        5. And checks for the file_count in the new_project in EFK stack
        """

        pod_obj, pvc_obj = create_pvc_and_deploymentconfig_pod

        # Running IO on the app_pod
        pod_obj.run_io(storage_type='fs', size=8000)

        # Searching for new_project in EFK stack
        pod_list = get_all_pods(namespace='openshift-logging')
        elasticsearch_pod = [
            pod.name for pod in pod_list
            if pod.name.startswith('elasticsearch')
        ]
        elasticsearch_pod_obj = get_pod_obj(name=elasticsearch_pod[1],
                                            namespace='openshift-logging')
        projects = elasticsearch_pod_obj.exec_cmd_on_pod(
            command='indices | grep project', out_yaml_format=True)
        logger.info(projects)
        if pvc_obj.project.namespace in projects:
            logger.info("The new project exists in the EFK stack")
        else:
            raise ModuleNotFoundError
Exemplo n.º 9
0
    def validate_messages_are_consumed(
        self, namespace=constants.AMQ_NAMESPACE, value="10000", since_time=1800
    ):
        """
        Validates if all messages are received in consumer pod

        Args:
            namespace (str): Namespace of the pod
            value (str): Number of messages are recieved
            since_time (int): Number of seconds to required to receive the msg

        Raises exception on failures

        """
        # ToDo: Support multiple topics and users
        consumer_pod_objs = [
            get_pod_obj(pod)
            for pod in get_pod_name_by_pattern("hello-world-consumer", namespace)
        ]
        for pod in consumer_pod_objs:
            for msg in TimeoutSampler(
                900, 30, self.validate_msg, pod.name, namespace, value, since_time
            ):
                if msg:
                    break
        assert msg, "Consumer didn't receive all messages"
        log.info("Consumer received all messages")
Exemplo n.º 10
0
    def get_couchbase_nodes(self):
        """
        Get nodes that contain a couchbase app pod

        Returns:
            list: List of nodes

        """
        app_pods_list = get_pod_name_by_pattern(
            "cb-example", constants.COUCHBASE_OPERATOR
        )
        app_pod_objs = list()
        for pod in app_pods_list:
            app_pod_objs.append(
                get_pod_obj(pod, namespace=constants.COUCHBASE_OPERATOR)
            )

        log.info("Create a list of nodes that contain a couchbase app pod")
        nodes_set = set()
        for pod in app_pod_objs:
            logging.info(
                f"pod {pod.name} located on "
                f"node {pod.get().get('spec').get('nodeName')}"
            )
            nodes_set.add(pod.get().get("spec").get("nodeName"))
        return list(nodes_set)
Exemplo n.º 11
0
def noobaa_running_node_restart(pod_name):
    """
    Function to restart node which has noobaa pod's running

    Args:
        pod_name (str): Name of noobaa pod

    """

    nb_pod_obj = pod.get_pod_obj(
        (get_pod_name_by_pattern(
            pattern=pod_name,
            namespace=constants.OPENSHIFT_STORAGE_NAMESPACE))[0],
        namespace=constants.OPENSHIFT_STORAGE_NAMESPACE,
    )
    nb_node_name = pod.get_pod_node(nb_pod_obj).name
    factory = platform_nodes.PlatformNodesFactory()
    nodes = factory.get_nodes_platform()
    nb_nodes = get_node_objs(node_names=nb_node_name)
    log.info(f"{pod_name} is running on {nb_node_name}")
    log.info(f"Restating node: {nb_node_name}....")
    nodes.restart_nodes_by_stop_and_start(nodes=nb_nodes, force=True)

    # Validate nodes are up and running
    wait_for_nodes_status()
    ceph_health_check(tries=30, delay=60)
    helpers.wait_for_resource_state(nb_pod_obj,
                                    constants.STATUS_RUNNING,
                                    timeout=180)
Exemplo n.º 12
0
    def get_new_pods(self, pod_list):
        """
        Fetches info about the respun pods in the cluster

        Args:
            pod_list (list): list of previous pod objects

        Returns:
            list : list of respun pod objects

        """
        new_pods = []
        for pod_obj in pod_list:
            if any(str in pod_obj.name for str in ["mon", "osd"]):
                pod_label = pod_obj.labels.get("pod-template-hash")
                label_selector = f"pod-template-hash={pod_label}"
            else:
                pod_label = pod_obj.labels.get("deploymentconfig")
                label_selector = f"deploymentconfig={pod_label}"

            pods_data = pod.get_pods_having_label(label_selector,
                                                  pod_obj.namespace)
            for pod_data in pods_data:
                pod_name = pod_data.get("metadata").get("name")
                if "-deploy" not in pod_name and pod_name not in pod_obj.name:
                    new_pods.append(
                        pod.get_pod_obj(pod_name, pod_obj.namespace))
        logger.info(f"Previous pods: {[pod_obj.name for pod_obj in pod_list]}")
        logger.info(f"Respun pods: {[pod_obj.name for pod_obj in new_pods]}")
        return new_pods
Exemplo n.º 13
0
    def test_delete_rook_ceph_mon_pod(self):
        for i in range(5):
            rook_operator_pod = pod.get_ocs_operator_pod(
                ocs_label=constants.OPERATOR_LABEL,
                namespace=constants.OPENSHIFT_STORAGE_NAMESPACE,
            )
            assert rook_operator_pod, "No rook operator pod found"
            log.info(
                f"Found rook-operator pod {rook_operator_pod.name}. Deleting it."
            )

            operator_deleted = rook_operator_pod.delete(wait=False)
            assert operator_deleted, f"Failed to delete pod {rook_operator_pod.name}"
            try:
                for pod_list in TimeoutSampler(
                        30,
                        1,
                        pod.get_pods_having_label,
                        constants.ROOK_CEPH_DETECT_VERSION_LABEL,
                        namespace=constants.OPENSHIFT_STORAGE_NAMESPACE,
                ):
                    if len(pod_list) > 0:
                        self.rook_detect_pod_name = (
                            pod_list[0].get("metadata").get("name"))
                        self.rook_detect_pod_obj = pod.get_pod_obj(
                            self.rook_detect_pod_name,
                            constants.OPENSHIFT_STORAGE_NAMESPACE,
                        )
                        break
            except TimeoutExpiredError:
                assert True, "rook-ceph-detect-version pod not found"

            log.info(
                f"Found rook-ceph-detect-version pod {self.rook_detect_pod_name}. Deleting it"
            )
            rook_detect_deleted = self.rook_detect_pod_obj.delete(wait=True)
            assert (rook_detect_deleted
                    ), f"Failed to delete pod {self.rook_detect_pod_name}"
            self.rook_detect_pod_obj.ocp.wait_for_delete(
                self.rook_detect_pod_name)

        # Make sure there's no detect-version pod leftover
        try:
            for pod_list in TimeoutSampler(
                    30,
                    1,
                    pod.get_pods_having_label,
                    constants.ROOK_CEPH_DETECT_VERSION_LABEL,
                    namespace=constants.OPENSHIFT_STORAGE_NAMESPACE,
            ):
                if len(pod_list) == 0:
                    break
                else:
                    log.info(
                        f"Pod {pod_list[0].get('metadata').get('name')} found. waiting for it to be deleted"
                    )
        except TimeoutExpiredError:
            assert True, "rook-ceph-detect-version pod still exists"
Exemplo n.º 14
0
    def get_pgbench_pods(self):
        """
        Get all pgbench pods

        Returns:
            List: pgbench pod objects list

        """
        return [
            get_pod_obj(pod, RIPSAW_NAMESPACE)
            for pod in get_pod_name_by_pattern("pgbench", RIPSAW_NAMESPACE)
        ]
Exemplo n.º 15
0
def validate_pvc_are_mounted_on_monitoring_pods(pod_list):
    """
    Validate created pvc are mounted on monitoring pods

    Args:
        pod_list (list): List of the pods where pvc are mounted
    """
    for pod in pod_list:
        pod_obj = get_pod_obj(
            name=pod, namespace='openshift-monitoring'
        )
        mount_point = pod_obj.exec_cmd_on_pod(command="df -kh")
        assert "/dev/rbd" in mount_point, f"pvc is not mounted on pod {pod}"
    logger.info("Verified all pvc are mounted on monitoring pods")
Exemplo n.º 16
0
def validate_pvc_are_mounted_on_monitoring_pods(pod_list):
    """
    Validate created pvc are mounted on monitoring pods

    Args:
        pod_list (list): List of the pods where pvc are mounted

    """
    for pod in pod_list:
        pod_obj = get_pod_obj(name=pod.name,
                              namespace=defaults.OCS_MONITORING_NAMESPACE)
        mount_point = pod_obj.exec_cmd_on_pod(command="df -kh")
        assert "/dev/rbd" in mount_point, f"pvc is not mounted on pod {pod.name}"
    logger.info("Verified all pvc are mounted on monitoring pods")
Exemplo n.º 17
0
    def get_jenkins_deploy_pods(self, namespace):
        """
        Get all jenkins deploy pods

        Args:
            namespace (str): get pods in namespace

        Returns:
            pod_objs (list): jenkins deploy pod objects list

        """
        return [
            get_pod_obj(pod, namespace=namespace)
            for pod in get_pod_name_by_pattern('deploy', namespace=namespace)
        ]
Exemplo n.º 18
0
    def get_pgbench_status(self, pgbench_pod_name):
        """
        Get pgbench status

        Args:
            pgbench_pod_name (str): Name of the pgbench pod

        Returns:
            str: state of pgbench pod (running/completed)

        """
        pod_obj = get_pod_obj(pgbench_pod_name, namespace=RIPSAW_NAMESPACE)
        status = pod_obj.get().get('status').get('containerStatuses')[0].get(
            'state')

        return 'running' if list(
            status.keys())[0] == 'running' else status['terminated']['reason']
Exemplo n.º 19
0
    def get_pgbench_status(self, pgbench_pod_name):
        """
        Get pgbench status

        Args:
            pgbench_pod_name (str): Name of the pgbench pod

        Returns:
            str: state of pgbench pod (running/completed)

        """
        pod_obj = get_pod_obj(pgbench_pod_name, namespace=RIPSAW_NAMESPACE)
        status = pod_obj.get().get("status").get("containerStatuses")[0].get(
            "state")

        return ("running" if list(status.keys())[0] == "running" else
                status["terminated"]["reason"])
Exemplo n.º 20
0
    def respin_couchbase_app_pod(self):
        """
        Respin the couchbase app pod

        Returns:
            pod status

        """
        app_pod_list = get_pod_name_by_pattern('cb-example',
                                               constants.COUCHBASE_OPERATOR)
        app_pod = app_pod_list[random.randint(0, len(app_pod_list) - 1)]
        logging.info(f"respin pod {app_pod}")
        app_pod_obj = get_pod_obj(app_pod,
                                  namespace=constants.COUCHBASE_OPERATOR)
        app_pod_obj.delete(wait=True, force=False)
        wait_for_resource_state(resource=app_pod_obj,
                                state=constants.STATUS_RUNNING,
                                timeout=300)
Exemplo n.º 21
0
    def validate_project_exists(self, pvc_obj):
        """
        This function checks whether the new project exists in the
        EFK stack
        """

        pod_list = get_all_pods(namespace='openshift-logging')
        elasticsearch_pod = [
            pod.name for pod in pod_list
            if pod.name.startswith('elasticsearch')
        ]
        elasticsearch_pod_obj = get_pod_obj(name=elasticsearch_pod[1],
                                            namespace='openshift-logging')
        projects = elasticsearch_pod_obj.exec_cmd_on_pod(
            command='indices | grep project', out_yaml_format=True)
        logger.info(projects)
        if pvc_obj.project.namespace in projects:
            logger.info("The new project exists in the EFK stack")
        else:
            raise ModuleNotFoundError
Exemplo n.º 22
0
    def run_amq_workload(self, command, benchmark_pod_name, tiller_namespace, timeout):
        """
        Runs amq workload in bg

        Args:
             command (str): Command to run on pod
             benchmark_pod_name (str): Pod name
             tiller_namespace (str): Namespace of pod
             timeout (int): Time to complete the run

        Returns:
            result (str): Returns benchmark run information

        """
        pod_obj = get_pod_obj(
            name=f"{benchmark_pod_name}-driver", namespace=tiller_namespace
        )
        return pod_obj.exec_cmd_on_pod(
            command=command, out_yaml_format=False, timeout=timeout
        )
Exemplo n.º 23
0
    def _cosbench_cli(self, workload):
        """
        Runs Cosbench cli to initiate workload

        Args:
            workload (str): Workload file

        """
        submit_key = "Accepted with ID"
        cobench_pod_obj = get_pod_obj(name=self.cosbench_pod.name,
                                      namespace=self.namespace)
        submit = cobench_pod_obj.exec_cmd_on_pod(
            command=f"/cos/cli.sh submit /cos/{workload}",
            out_yaml_format=True,
            timeout=180,
        )
        if submit_key in submit.keys():
            self.workload_id = submit[submit_key]
        else:
            assert f"Failed to submit the workload, ID not found. stdout: {submit}"
Exemplo n.º 24
0
    def test_delete_local_volume_sym_link(self):
        """
        Delete sym link on LSO Cluster
        """
        # Get rook-ceph-crashcollector pod objects
        crashcollector_pods = get_pod_name_by_pattern(
            pattern="rook-ceph-crashcollector",
            namespace=ROOK_CLUSTER_NAMESPACE)
        crashcollector_pods_objs = []
        for crashcollector_pod in crashcollector_pods:
            crashcollector_pods_objs.append(
                get_pod_obj(name=crashcollector_pod,
                            namespace=ROOK_CLUSTER_NAMESPACE))

        # Get Node object
        node_obj = get_pod_node(pod_obj=crashcollector_pods_objs[0])

        # Get Sym link
        osd_pvcs = get_deviceset_pvcs()
        pv_name = osd_pvcs[0].data["spec"]["volumeName"]
        ocp_obj = ocp.OCP(namespace=ROOK_CLUSTER_NAMESPACE, kind=constants.PV)
        pv_obj = ocp_obj.get(resource_name=pv_name)
        path = pv_obj["spec"]["local"]["path"]

        log.info("Delete sym link")
        oc_cmd = ocp.OCP(namespace=ROOK_CLUSTER_NAMESPACE)
        cmd = f"rm -rfv {path}"
        oc_cmd.exec_oc_debug_cmd(node=node_obj.name, cmd_list=[cmd])

        log.info(
            "Waiting for rook-ceph-crashcollector pods to be reach Running state"
        )
        for crashcollector_pods_obj in crashcollector_pods_objs:
            wait_for_resource_state(resource=crashcollector_pods_obj,
                                    state=constants.STATUS_RUNNING)

        # Check all OCS pods status, they should be in Running or Completed state
        wait_for_storage_pods()

        # Check ceph status
        ceph_health_check(namespace=config.ENV_DATA["cluster_namespace"])
Exemplo n.º 25
0
    def workloads_dir_setup(self, request):
        """
        Setting up the environment for the test

        """
        if config.DEPLOYMENT.get("local_storage"):
            self.worker_node = node.get_worker_nodes()[0]
            self.oc_cmd = OCP(namespace=defaults.ROOK_CLUSTER_NAMESPACE)
            mon_pod_name = self.oc_cmd.exec_oc_debug_cmd(
                node=self.worker_node,
                cmd_list=["ls /var/lib/rook/ | grep mon"],
            )
            mon_pod_id = mon_pod_name.split("-")[1].replace("\n", "")

            mon_pods_info = pod.get_pods_having_label(
                label=f"ceph_daemon_id={mon_pod_id}",
                namespace=defaults.ROOK_CLUSTER_NAMESPACE,
            )
            self.mon_pod = pod.get_pod_obj(
                name=mon_pods_info[0]["metadata"]["name"],
                namespace=defaults.ROOK_CLUSTER_NAMESPACE,
            )
        else:
            self.mon_pod = random.choice(pod.get_mon_pods())
        self.mon_suffix = self.mon_pod.get().get("metadata").get("labels").get(
            "mon")

        self.workloads_dir = f"/var/lib/ceph/mon/ceph-{self.mon_suffix}/workloads"
        log.info(f"Selected mon '{self.mon_pod.name}'")
        self.mon_pod.exec_cmd_on_pod(f"mkdir {self.workloads_dir}")
        self.mon_pod.exec_cmd_on_pod(f"touch {self.workloads_dir}/{TEMP_FILE}")

        def finalizer():
            self.mon_pod.exec_cmd_on_pod(f"rm -rf {self.workloads_dir}")
            time.sleep(SLEEP_TIMEOUT)
            utils.ceph_health_check()

        request.addfinalizer(finalizer)
Exemplo n.º 26
0
def get_helper_pods_output():
    """
    Get the output of "oc describe mg-helper pods"

    Returns:
        str: the output of "oc describe pods mg-helper" and "oc logs mg-helper"

    """
    from ocs_ci.ocs.resources.pod import get_pod_obj, get_pod_logs

    output_describe_mg_helper = ""
    helper_pods = get_pod_name_by_pattern(pattern="helper")
    for helper_pod in helper_pods:
        try:
            helper_pod_obj = get_pod_obj(
                name=helper_pod,
                namespace=constants.OPENSHIFT_STORAGE_NAMESPACE)
            output_describe_mg_helper += (
                f"****helper pod {helper_pod} describe****\n{helper_pod_obj.describe()}\n"
                f"****helper pod {helper_pod} logs***\n{get_pod_logs(pod_name=helper_pod)}"
            )
        except Exception as e:
            log.error(e)
    return output_describe_mg_helper
Exemplo n.º 27
0
def get_spun_dc_pods(pod_list):
    """
    Fetches info about the re-spun dc pods

    Args:
        pod_list (list): list of previous pod objects

    Returns:
        list : list of respun pod objects

    """
    new_pods = []
    for pod_obj in pod_list:
        pod_label = pod_obj.labels.get("deploymentconfig")
        label_selector = f"deploymentconfig={pod_label}"

        pods_data = pod.get_pods_having_label(label_selector, pod_obj.namespace)
        for pod_data in pods_data:
            pod_name = pod_data.get("metadata").get("name")
            if "-deploy" not in pod_name and pod_name not in pod_obj.name:
                new_pods.append(pod.get_pod_obj(pod_name, pod_obj.namespace))
    logger.info(f"Previous pods: {[pod_obj.name for pod_obj in pod_list]}")
    logger.info(f"Re-spun pods: {[pod_obj.name for pod_obj in new_pods]}")
    return new_pods
Exemplo n.º 28
0
    def test_recovery_from_volume_deletion(self, nodes, pvc_factory,
                                           pod_factory):
        """
        Test cluster recovery from disk deletion from the platform side.
        Based on documented procedure detailed in
        https://bugzilla.redhat.com/show_bug.cgi?id=1823183

        """
        logger.info("Picking a PV which to be deleted from the platform side")
        osd_pvs = get_deviceset_pvs()
        osd_pv = random.choice(osd_pvs)
        osd_pv_name = osd_pv.name
        # get the claim name
        logger.info(f"Getting the claim name for OSD PV {osd_pv_name}")
        claim_name = osd_pv.get().get("spec").get("claimRef").get("name")

        # Get the backing volume name
        logger.info(f"Getting the backing volume name for PV {osd_pv_name}")
        backing_volume = nodes.get_data_volumes(pvs=[osd_pv])[0]

        # Get the corresponding PVC
        logger.info(f"Getting the corresponding PVC of PV {osd_pv_name}")
        osd_pvcs = get_deviceset_pvcs()
        osd_pvcs_count = len(osd_pvcs)
        osd_pvc = [
            ds for ds in osd_pvcs
            if ds.get().get("metadata").get("name") == claim_name
        ][0]

        # Get the corresponding OSD pod and ID
        logger.info(f"Getting the OSD pod using PVC {osd_pvc.name}")
        osd_pods = get_osd_pods()
        osd_pods_count = len(osd_pods)
        osd_pod = [
            osd_pod for osd_pod in osd_pods
            if osd_pod.get().get("metadata").get("labels").get(
                constants.CEPH_ROOK_IO_PVC_LABEL) == claim_name
        ][0]
        logger.info(f"OSD_POD {osd_pod.name}")
        osd_id = osd_pod.get().get("metadata").get("labels").get("ceph-osd-id")

        # Get the node that has the OSD pod running on
        logger.info(
            f"Getting the node that has the OSD pod {osd_pod.name} running on")
        osd_node = get_pod_node(osd_pod)
        osd_prepare_pods = get_osd_prepare_pods()
        osd_prepare_pod = [
            pod for pod in osd_prepare_pods if pod.get().get("metadata").get(
                "labels").get(constants.CEPH_ROOK_IO_PVC_LABEL) == claim_name
        ][0]
        osd_prepare_job_name = (osd_prepare_pod.get().get("metadata").get(
            "labels").get("job-name"))
        osd_prepare_job = get_job_obj(osd_prepare_job_name)

        # Get the corresponding OSD deployment
        logger.info(f"Getting the OSD deployment for OSD PVC {claim_name}")
        osd_deployment = [
            osd_pod for osd_pod in get_osd_deployments()
            if osd_pod.get().get("metadata").get("labels").get(
                constants.CEPH_ROOK_IO_PVC_LABEL) == claim_name
        ][0]
        osd_deployment_name = osd_deployment.name

        # Delete the volume from the platform side
        logger.info(f"Deleting {backing_volume} from the platform side")
        nodes.detach_volume(backing_volume, osd_node)

        # Scale down OSD deployment
        logger.info(f"Scaling down OSD deployment {osd_deployment_name} to 0")
        ocp.OCP().exec_oc_cmd(
            f"scale --replicas=0 deployment/{osd_deployment_name}")

        # Force delete OSD pod if necessary
        osd_pod_name = osd_pod.name
        logger.info(f"Waiting for OSD pod {osd_pod.name} to get deleted")
        try:
            osd_pod.ocp.wait_for_delete(resource_name=osd_pod_name)
        except TimeoutError:
            osd_pod.delete(force=True)
            osd_pod.ocp.wait_for_delete(resource_name=osd_pod_name)

        # Run ocs-osd-removal job
        ocp_version = float(get_ocp_version())
        if ocp_version >= 4.6:
            cmd = f"process ocs-osd-removal -p FAILED_OSD_IDS={osd_id} -o yaml"
        else:
            cmd = f"process ocs-osd-removal -p FAILED_OSD_ID={osd_id} -o yaml"

        logger.info(f"Executing OSD removal job on OSD-{osd_id}")
        ocp_obj = ocp.OCP(namespace=config.ENV_DATA["cluster_namespace"])
        osd_removal_job_yaml = ocp_obj.exec_oc_cmd(cmd)
        osd_removal_job = OCS(**osd_removal_job_yaml)
        osd_removal_job.create(do_reload=False)

        # Get ocs-osd-removal pod name
        logger.info("Getting the ocs-osd-removal pod name")
        osd_removal_pod_name = get_osd_removal_pod_name(osd_id)
        osd_removal_pod_obj = get_pod_obj(osd_removal_pod_name,
                                          namespace="openshift-storage")
        osd_removal_pod_obj.ocp.wait_for_resource(
            condition=constants.STATUS_COMPLETED,
            resource_name=osd_removal_pod_name)

        # Verify OSD removal from the ocs-osd-removal pod logs
        logger.info(
            f"Verifying removal of OSD from {osd_removal_pod_name} pod logs")
        logs = get_pod_logs(osd_removal_pod_name)
        pattern = f"purged osd.{osd_id}"
        assert re.search(pattern, logs)

        osd_pvc_name = osd_pvc.name

        if ocp_version < 4.6:
            # Delete the OSD prepare job
            logger.info(f"Deleting OSD prepare job {osd_prepare_job_name}")
            osd_prepare_job.delete()
            osd_prepare_job.ocp.wait_for_delete(
                resource_name=osd_prepare_job_name, timeout=120)

            # Delete the OSD PVC
            logger.info(f"Deleting OSD PVC {osd_pvc_name}")
            osd_pvc.delete()
            osd_pvc.ocp.wait_for_delete(resource_name=osd_pvc_name)

            # Delete the OSD deployment
            logger.info(f"Deleting OSD deployment {osd_deployment_name}")
            osd_deployment.delete()
            osd_deployment.ocp.wait_for_delete(
                resource_name=osd_deployment_name, timeout=120)
        else:
            # If ocp version is '4.6' and above the osd removal job should
            # delete the OSD prepare job, OSD PVC, OSD deployment
            logger.info(
                f"Verifying deletion of OSD prepare job {osd_prepare_job_name}"
            )
            osd_prepare_job.ocp.wait_for_delete(
                resource_name=osd_prepare_job_name, timeout=30)
            logger.info(f"Verifying deletion of OSD PVC {osd_pvc_name}")
            osd_pvc.ocp.wait_for_delete(resource_name=osd_pvc_name, timeout=30)
            logger.info(
                f"Verifying deletion of OSD deployment {osd_deployment_name}")
            osd_deployment.ocp.wait_for_delete(
                resource_name=osd_deployment_name, timeout=30)

        # Delete PV
        logger.info(f"Verifying deletion of PV {osd_pv_name}")
        try:
            osd_pv.ocp.wait_for_delete(resource_name=osd_pv_name)
        except TimeoutError:
            osd_pv.delete()
            osd_pv.ocp.wait_for_delete(resource_name=osd_pv_name)

        if ocp_version < 4.6:
            # Delete the rook ceph operator pod to trigger reconciliation
            rook_operator_pod = get_operator_pods()[0]
            logger.info(
                f"deleting Rook Ceph operator pod {rook_operator_pod.name}")
            rook_operator_pod.delete()

        # Delete the OSD removal job
        logger.info(f"Deleting OSD removal job ocs-osd-removal-{osd_id}")
        osd_removal_job = get_job_obj(f"ocs-osd-removal-{osd_id}")
        osd_removal_job.delete()
        osd_removal_job.ocp.wait_for_delete(
            resource_name=f"ocs-osd-removal-{osd_id}")

        timeout = 600
        # Wait for OSD PVC to get created and reach Bound state
        logger.info(
            "Waiting for a new OSD PVC to get created and reach Bound state")
        assert osd_pvc.ocp.wait_for_resource(
            timeout=timeout,
            condition=constants.STATUS_BOUND,
            selector=constants.OSD_PVC_GENERIC_LABEL,
            resource_count=osd_pvcs_count,
        ), (f"Cluster recovery failed after {timeout} seconds. "
            f"Expected to have {osd_pvcs_count} OSD PVCs in status Bound. Current OSD PVCs status: "
            f"{[pvc.ocp.get_resource(pvc.get().get('metadata').get('name'), 'STATUS') for pvc in get_deviceset_pvcs()]}"
            )
        # Wait for OSD pod to get created and reach Running state
        logger.info(
            "Waiting for a new OSD pod to get created and reach Running state")
        assert osd_pod.ocp.wait_for_resource(
            timeout=timeout,
            condition=constants.STATUS_RUNNING,
            selector=constants.OSD_APP_LABEL,
            resource_count=osd_pods_count,
        ), (f"Cluster recovery failed after {timeout} seconds. "
            f"Expected to have {osd_pods_count} OSD pods in status Running. Current OSD pods status: "
            f"{[osd_pod.ocp.get_resource(pod.get().get('metadata').get('name'), 'STATUS') for pod in get_osd_pods()]}"
            )

        # We need to silence the old osd crash warning due to BZ https://bugzilla.redhat.com/show_bug.cgi?id=1896810
        # This is a workaround - issue for tracking: https://github.com/red-hat-storage/ocs-ci/issues/3438
        if ocp_version >= 4.6:
            silence_osd_crash = cluster.wait_for_silence_ceph_osd_crash_warning(
                osd_pod_name)
            if not silence_osd_crash:
                logger.info("Didn't find ceph osd crash warning")

        # Validate cluster is still functional
        self.sanity_helpers.health_check(tries=100)
        self.sanity_helpers.create_resources(pvc_factory, pod_factory)
Exemplo n.º 29
0
    def test_rgw_kafka_notifications(self, bucket_factory):
        """
        Test to verify rgw kafka notifications

        """
        # Get sc
        sc = default_storage_class(interface_type=constants.CEPHBLOCKPOOL)

        # Deploy amq cluster
        self.amq.setup_amq_cluster(sc.name)

        # Create topic
        self.kafka_topic = self.amq.create_kafka_topic()

        # Create Kafkadrop pod
        (
            self.kafkadrop_pod,
            self.kafkadrop_pod,
            self.kafkadrop_route,
        ) = self.amq.create_kafkadrop()

        # Get the kafkadrop route
        kafkadrop_host = self.kafkadrop_route.get().get("spec").get("host")

        # Create bucket
        bucketname = bucket_factory(amount=1, interface="RGW-OC")[0].name

        # Get RGW credentials
        rgw_obj = RGW()
        rgw_endpoint, access_key, secret_key = rgw_obj.get_credentials()

        # Clone notify repo
        notify_path = clone_notify()

        # Initialise to put objects
        data = "A random string data to write on created rgw bucket"
        obc_obj = OBC(bucketname)
        s3_resource = boto3.resource(
            "s3",
            verify=retrieve_verification_mode(),
            endpoint_url=rgw_endpoint,
            aws_access_key_id=obc_obj.access_key_id,
            aws_secret_access_key=obc_obj.access_key,
        )
        s3_client = s3_resource.meta.client

        # Initialize notify command to run
        notify_cmd = (
            f"python {notify_path} -e {rgw_endpoint} -a {obc_obj.access_key_id} "
            f"-s {obc_obj.access_key} -b {bucketname} -ke {constants.KAFKA_ENDPOINT} -t {self.kafka_topic.name}"
        )
        log.info(f"Running cmd {notify_cmd}")

        # Put objects to bucket
        assert s3_client.put_object(Bucket=bucketname, Key="key-1",
                                    Body=data), "Failed: Put object: key-1"
        exec_cmd(notify_cmd)

        # Validate rgw logs notification are sent
        # No errors are seen
        pattern = "ERROR: failed to create push endpoint"
        rgw_pod_obj = get_rgw_pods()
        rgw_log = get_pod_logs(pod_name=rgw_pod_obj[0].name, container="rgw")
        assert re.search(pattern=pattern, string=rgw_log) is None, (
            f"Error: {pattern} msg found in the rgw logs."
            f"Validate {pattern} found on rgw logs and also "
            f"rgw bucket notification is working correctly")
        assert s3_client.put_object(Bucket=bucketname, Key="key-2",
                                    Body=data), "Failed: Put object: key-2"
        exec_cmd(notify_cmd)

        # Validate message are received Kafka side using curl command
        # A temporary way to check from Kafka side, need to check from UI
        curl_command = (
            f"curl -X GET {kafkadrop_host}/topic/{self.kafka_topic.name} "
            "-H 'content-type: application/vnd.kafka.json.v2+json'")
        json_output = run_cmd(cmd=curl_command)
        new_string = json_output.split()
        messages = new_string[new_string.index("messages</td>") + 1]
        if messages.find("1") == -1:
            raise Exception(
                "Error: Messages are not recieved from Kafka side."
                "RGW bucket notification is not working as expected.")

        # Validate the timestamp events
        ocs_version = config.ENV_DATA["ocs_version"]
        if Version.coerce(ocs_version) >= Version.coerce("4.8"):
            cmd = (
                f"bin/kafka-console-consumer.sh --bootstrap-server {constants.KAFKA_ENDPOINT} "
                f"--topic {self.kafka_topic.name} --from-beginning --timeout-ms 20000"
            )
            pod_list = get_pod_name_by_pattern(
                pattern="my-cluster-zookeeper",
                namespace=constants.AMQ_NAMESPACE)
            zookeeper_obj = get_pod_obj(name=pod_list[0],
                                        namespace=constants.AMQ_NAMESPACE)
            event_obj = zookeeper_obj.exec_cmd_on_pod(command=cmd)
            log.info(f"Event obj: {event_obj}")
            event_time = event_obj.get("Records")[0].get("eventTime")
            format_string = "%Y-%m-%dT%H:%M:%S.%fZ"
            try:
                datetime.strptime(event_time, format_string)
            except ValueError as ef:
                log.error(
                    f"Timestamp event {event_time} doesnt match the pattern {format_string}"
                )
                raise ef

            log.info(
                f"Timestamp event {event_time} matches the pattern {format_string}"
            )
Exemplo n.º 30
0
    def run_amq_benchmark(
        self,
        benchmark_pod_name="benchmark",
        kafka_namespace=constants.AMQ_NAMESPACE,
        tiller_namespace=AMQ_BENCHMARK_NAMESPACE,
        num_of_clients=8,
        worker=None,
        timeout=1800,
        amq_workload_yaml=None,
        run_in_bg=False,
    ):
        """
        Run benchmark pod and get the results

        Args:
            benchmark_pod_name (str): Name of the benchmark pod
            kafka_namespace (str): Namespace where kafka cluster created
            tiller_namespace (str): Namespace where tiller pod needs to be created
            num_of_clients (int): Number of clients to be created
            worker (str) : Loads to create on workloads separated with commas
                e.g http://benchmark-worker-0.benchmark-worker:8080,
                http://benchmark-worker-1.benchmark-worker:8080
            timeout (int): Time to complete the run
            amq_workload_yaml (dict): Contains amq workloads information keys and values
                :name (str): Name of the workloads
                :topics (int): Number of topics created
                :partitions_per_topic (int): Number of partitions per topic
                :message_size (int): Message size
                :payload_file (str): Load to run on workload
                :subscriptions_per_topic (int): Number of subscriptions per topic
                :consumer_per_subscription (int): Number of consumers per subscription
                :producers_per_topic (int): Number of producers per topic
                :producer_rate (int): Producer rate
                :consumer_backlog_sizegb (int): Size of block in gb
                :test_duration_minutes (int): Time to run the workloads
            run_in_bg (bool): On true the workload will run in background

        Return:
            result (str/Thread obj): Returns benchmark run information if run_in_bg is False.
                Otherwise a thread of the amq workload execution

        """

        # Namespace for to helm/tiller
        try:
            self.create_namespace(tiller_namespace)
        except CommandFailed as ef:
            if (
                f'project.project.openshift.io "{tiller_namespace}" already exists'
                not in str(ef)
            ):
                raise ef

        # Create rbac file
        try:
            sa_tiller = list(
                templating.load_yaml(constants.AMQ_RBAC_YAML, multi_document=True)
            )
            sa_tiller[0]["metadata"]["namespace"] = tiller_namespace
            sa_tiller[1]["subjects"][0]["namespace"] = tiller_namespace
            self.sa_tiller = OCS(**sa_tiller[0])
            self.crb_tiller = OCS(**sa_tiller[1])
            self.sa_tiller.create()
            self.crb_tiller.create()
        except (CommandFailed, CalledProcessError) as cf:
            log.error("Failed during creation of service account tiller")
            raise cf

        # Install helm cli (version v2.16.0 as we need tiller component)
        # And create tiller pods
        wget_cmd = f"wget -c --read-timeout=5 --tries=0 {URL}"
        untar_cmd = "tar -zxvf helm-v2.16.1-linux-amd64.tar.gz"
        tiller_cmd = (
            f"linux-amd64/helm init --tiller-namespace {tiller_namespace}"
            f" --service-account {tiller_namespace}"
        )
        exec_cmd(cmd=wget_cmd, cwd=self.dir)
        exec_cmd(cmd=untar_cmd, cwd=self.dir)
        exec_cmd(cmd=tiller_cmd, cwd=self.dir)

        # Validate tiller pod is running
        log.info("Waiting for 30s for tiller pod to come up")
        time.sleep(30)
        if self.is_amq_pod_running(
            pod_pattern="tiller", expected_pods=1, namespace=tiller_namespace
        ):
            log.info("Tiller pod is running")
        else:
            raise ResourceWrongStatusException("Tiller pod is not in running state")

        # Create benchmark pods
        log.info("Create benchmark pods")
        values = templating.load_yaml(constants.AMQ_BENCHMARK_VALUE_YAML)
        values["numWorkers"] = num_of_clients
        benchmark_cmd = (
            f"linux-amd64/helm install {constants.AMQ_BENCHMARK_POD_YAML}"
            f" --name {benchmark_pod_name} --tiller-namespace {tiller_namespace}"
        )
        exec_cmd(cmd=benchmark_cmd, cwd=self.dir)

        # Making sure the benchmark pod and clients are running
        if self.is_amq_pod_running(
            pod_pattern="benchmark",
            expected_pods=(1 + num_of_clients),
            namespace=tiller_namespace,
        ):
            log.info("All benchmark pod is up and running")
        else:
            raise ResourceWrongStatusException(
                "Benchmark pod is not getting to running state"
            )

        # Update commonConfig with kafka-bootstrap server details
        driver_kafka = templating.load_yaml(constants.AMQ_DRIVER_KAFKA_YAML)
        driver_kafka[
            "commonConfig"
        ] = f"bootstrap.servers=my-cluster-kafka-bootstrap.{kafka_namespace}.svc.cluster.local:9092"
        json_file = f"{self.dir}/driver_kafka"
        templating.dump_data_to_json(driver_kafka, json_file)
        cmd = f"cp {json_file} {benchmark_pod_name}-driver:/"
        self.pod_obj.exec_oc_cmd(cmd)

        # Update the workload yaml
        if not amq_workload_yaml:
            amq_workload_yaml = templating.load_yaml(constants.AMQ_WORKLOAD_YAML)
        yaml_file = f"{self.dir}/amq_workload.yaml"
        templating.dump_data_to_temp_yaml(amq_workload_yaml, yaml_file)
        cmd = f"cp {yaml_file} {benchmark_pod_name}-driver:/"
        self.pod_obj.exec_oc_cmd(cmd)

        self.benchmark = True

        # Run the benchmark
        if worker:
            cmd = f"bin/benchmark --drivers /driver_kafka --workers {worker} /amq_workload.yaml"
        else:
            cmd = "bin/benchmark --drivers /driver_kafka /amq_workload.yaml"
        log.info(f"Run benchmark and running command {cmd} inside the benchmark pod ")

        if run_in_bg:
            executor = ThreadPoolExecutor(1)
            result = executor.submit(
                self.run_amq_workload,
                cmd,
                benchmark_pod_name,
                tiller_namespace,
                timeout,
            )
            return result

        pod_obj = get_pod_obj(
            name=f"{benchmark_pod_name}-driver", namespace=tiller_namespace
        )
        result = pod_obj.exec_cmd_on_pod(
            command=cmd, out_yaml_format=False, timeout=timeout
        )

        return result