def label_nodes(request):
    """
    Fixture to label the node(s) that will run the application pod.
    That will be all workers node that do not run the OCS cluster.
    """
    def teardown():
        log.info('Clear label form worker (Application) nodes')
        # Getting all Application nodes
        app_nodes = machine.get_labeled_nodes(constants.APP_NODE_LABEL)
        helpers.remove_label_from_worker_node(app_nodes,
                                              constants.APP_NODE_LABEL)

    request.addfinalizer(teardown)

    # Getting all OCS nodes (to verify app pod wil not run on)
    ocs_nodes = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
    # Add label to the worker nodes
    worker_nodes = helpers.get_worker_nodes()
    # Getting list of free nodes
    free_nodes = list(set(worker_nodes) - set(ocs_nodes))

    log.info('Adding the app-node label to Non-OCS workers')
    log.debug(f'The Workers nodes are : {worker_nodes}')
    log.debug(f'The OCS nodes are : {ocs_nodes}')
    log.debug(f'The free nodes are : {free_nodes}')

    assert free_nodes, \
        'Did not found any worker to run on, pleas deploy another worker'

    helpers.label_worker_node(free_nodes, constants.APP_NODE_LABEL,
                              constants.VDBENCH_NODE_LABEL)

    return
    def test_automated_recovery_from_failed_nodes_IPI_proactive(
            self, interface, pvc_factory, pod_factory, dc_pod_factory):
        """
        Knip-678 Automated recovery from failed nodes
        Proactive case - IPI
        """
        # Get OSD running nodes
        osd_running_nodes = get_osd_running_nodes()
        log.info(f"OSDs are running on nodes {osd_running_nodes}")
        # Label osd nodes with fedora app
        label_worker_node(osd_running_nodes,
                          label_key='dc',
                          label_value='fedora')

        # Create DC app pods
        log.info("Creating DC based app pods")
        interface = (constants.CEPHBLOCKPOOL
                     if interface == 'rbd' else constants.CEPHFILESYSTEM)
        dc_pod_obj = []
        for i in range(2):
            dc_pod = dc_pod_factory(interface=interface,
                                    node_selector={'dc': 'fedora'})
            pod.run_io_in_bg(dc_pod, fedora_dc=True)
            dc_pod_obj.append(dc_pod)

        # Get app pods running nodes
        dc_pod_node_name = get_app_pod_running_nodes(dc_pod_obj)
        log.info(f"DC app pod running nodes are {dc_pod_node_name}")

        # Get both osd and app pod running node
        common_nodes = get_both_osd_and_app_pod_running_node(
            osd_running_nodes, dc_pod_node_name)
        msg = "Common OSD and app running node(s) NOT found"
        assert (len(common_nodes) > 0), msg
        log.info(f"Common OSD and app pod running nodes are {common_nodes}")

        # Get the machine name using the node name
        machine_name = machine.get_machine_from_node_name(common_nodes[0])
        log.info(f"{common_nodes[0]} associated machine is {machine_name}")

        # Get the machineset name using machine name
        machineset_name = machine.get_machineset_from_machine_name(
            machine_name)
        log.info(
            f"{common_nodes[0]} associated machineset is {machineset_name}")

        # Add a new node and label it
        add_new_node_and_label_it(machineset_name)

        # Delete the machine
        machine.delete_machine(machine_name)
        log.info(f"Successfully deleted machine {machine_name}")

        # DC app pods on the failed node will get automatically created on
        # other running node. Waiting for all dc app pod to reach running
        # state
        pod.wait_for_dc_app_pods_to_reach_running_state(dc_pod_obj)
        log.info("All the dc pods reached running state")

        pod.wait_for_storage_pods()

        # Check basic cluster functionality by creating resources
        # (pools, storageclasses, PVCs, pods - both CephFS and RBD),
        # run IO and delete the resources
        self.sanity_helpers.create_resources(pvc_factory, pod_factory)
        self.sanity_helpers.delete_resources()

        # Perform cluster and Ceph health checks
        self.sanity_helpers.health_check()
    def test_automated_recovery_from_failed_nodes_IPI_reactive(
            self, nodes, pvc_factory, pod_factory, failure, dc_pod_factory,
            interface):
        """
        Knip-678 Automated recovery from failed nodes
        Reactive case - IPI
        """
        # Get OSD running nodes
        osd_running_nodes = get_osd_running_nodes()
        log.info(f"OSDs are running on nodes {osd_running_nodes}")
        # Label osd nodes with fedora app
        label_worker_node(osd_running_nodes,
                          label_key='dc',
                          label_value='fedora')

        # Create DC app pods
        log.info("Creating DC based app pods")
        if interface == 'rbd':
            interface = constants.CEPHBLOCKPOOL
        elif interface == 'cephfs':
            interface = constants.CEPHFILESYSTEM
        dc_pod_obj = []
        for i in range(2):
            dc_pod = dc_pod_factory(interface=interface,
                                    node_selector={'dc': 'fedora'})
            self.threads.append(pod.run_io_in_bg(dc_pod, fedora_dc=True))
            dc_pod_obj.append(dc_pod)

        # Get app pods running nodes
        dc_pod_node_name = get_app_pod_running_nodes(dc_pod_obj)
        log.info(f"DC app pod running nodes are {dc_pod_node_name}")

        # Get both osd and app pod running node
        common_nodes = get_both_osd_and_app_pod_running_node(
            osd_running_nodes, dc_pod_node_name)
        log.info(f"Both OSD and app pod is running on nodes {common_nodes}")

        # Get the machine name using the node name
        machine_name = machine.get_machine_from_node_name(common_nodes[0])
        log.info(f"{common_nodes[0]} associated machine is {machine_name}")

        # Get the machineset name using machine name
        machineset_name = machine.get_machineset_from_machine_name(
            machine_name)
        log.info(
            f"{common_nodes[0]} associated machineset is {machineset_name}")

        # Add a new node and label it
        add_new_node_and_label_it(machineset_name)
        # Get the failure node obj
        failure_node_obj = get_node_objs(node_names=[common_nodes[0]])

        # Induce failure on the selected failure node
        log.info(f"Inducing failure on node {failure_node_obj[0].name}")
        if failure == "shutdown":
            nodes.stop_nodes(failure_node_obj, wait=True)
            log.info(f"Successfully powered off node: "
                     f"{failure_node_obj[0].name}")
        elif failure == "terminate":
            nodes.terminate_nodes(failure_node_obj, wait=True)
            log.info(f"Successfully terminated node : "
                     f"{failure_node_obj[0].name} instance")

        try:
            # DC app pods on the failed node will get automatically created on other
            # running node. Waiting for all dc app pod to reach running state
            pod.wait_for_dc_app_pods_to_reach_running_state(dc_pod_obj,
                                                            timeout=720)
            log.info("All the dc pods reached running state")
            pod.wait_for_storage_pods()

        except ResourceWrongStatusException:
            if failure == "shutdown":
                nodes.terminate_nodes(failure_node_obj, wait=True)
                log.info(f"Successfully terminated node : "
                         f"{failure_node_obj[0].name} instance")
            raise

        # Check basic cluster functionality by creating resources
        # (pools, storageclasses, PVCs, pods - both CephFS and RBD),
        # run IO and delete the resources
        self.sanity_helpers.create_resources(pvc_factory, pod_factory)
        self.sanity_helpers.delete_resources()

        # Perform cluster and Ceph health checks
        self.sanity_helpers.health_check()
Пример #4
0
def label_nodes(request, with_ocs):
    """
    Fixture to label the node(s) that will run the application pod.
    That will be all workers node that do not run the OCS cluster.
    """

    m_set = ''  # this will hold machine_set name that added

    def teardown():

        ceph_health_check()

        if with_ocs:
            return

        if m_set != '':
            log.info(f'Destroy {m_set}')
            machine.delete_custom_machineset(m_set)
        else:
            log.info('Clear label form worker (Application) nodes')
            # Getting all Application nodes
            app_nodes = machine.get_labeled_nodes(constants.APP_NODE_LABEL)
            log.debug(f'The application nodes are : {app_nodes}')
            helpers.remove_label_from_worker_node(app_nodes,
                                                  constants.VDBENCH_NODE_LABEL)

    request.addfinalizer(teardown)

    if with_ocs:
        return

    # Add label to the worker nodes

    # Getting all OCS nodes (to verify app pod wil not run on)
    ocs_nodes = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
    worker_nodes = helpers.get_worker_nodes()
    # Getting list of free nodes
    free_nodes = list(set(worker_nodes) - set(ocs_nodes))

    if not free_nodes:
        # No free nodes -  Creating new machineset for application pods
        log.info('Adding new machineset, with worker for application pod')
        m_set = machine.create_custom_machineset(
            label=constants.APP_NODE_LABEL)
        machine.wait_for_new_node_to_be_ready(m_set)

        free_nodes = machine.get_labeled_nodes(
            f'node-role.kubernetes.io/app={constants.APP_NODE_LABEL}')

        # TODO: implement this for VMWare as well.

    log.info('Adding the app-node label to Non-OCS workers')
    log.debug(f'The Workers nodes are : {worker_nodes}')
    log.debug(f'The OCS nodes are : {ocs_nodes}')
    log.debug(f'The free nodes are : {free_nodes}')

    assert free_nodes, \
        'Did not found any worker to run on, pleas deploy another worker'

    helpers.label_worker_node(free_nodes, constants.APP_NODE_LABEL,
                              constants.VDBENCH_NODE_LABEL)

    return
Пример #5
0
def check_and_add_enough_worker(worker_count):
    """
    Function to check if there is enough workers available to scale pods.
    IF there is no enough worker then worker will be added based on supported platforms
    Function also adds scale label to the respective worker nodes.

    Args:
        worker_count (int): Expected worker count to be present in the setup

    Returns:
        book: True is there is enough worker count else raise exception.

    """
    # Check either to use OCS workers for scaling app pods
    # Further continue to label the worker with scale label else not
    worker_list = helpers.get_worker_nodes()
    ocs_worker_list = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
    scale_worker = machine.get_labeled_nodes(constants.SCALE_LABEL)
    if config.RUN.get('use_ocs_worker_for_scale'):
        if not scale_worker:
            helpers.label_worker_node(
                node_list=worker_list, label_key='scale-label', label_value='app-scale'
            )
    else:
        if not scale_worker:
            for node_item in ocs_worker_list:
                worker_list.remove(node_item)
            if worker_list:
                helpers.label_worker_node(
                    node_list=worker_list, label_key='scale-label', label_value='app-scale'
                )
    scale_worker_list = machine.get_labeled_nodes(constants.SCALE_LABEL)
    logging.info(f"Print existing scale worker {scale_worker_list}")

    # Check if there is enough nodes to continue scaling of app pods
    if len(scale_worker_list) >= worker_count:
        logging.info(
            f"Setup has expected worker count {worker_count} "
            "to continue scale of pods"
        )
        return True
    else:
        logging.info(
            "There is no enough worker in the setup, will add enough worker "
            "for the automation supported platforms"
        )
        # Add enough worker for AWS
        if config.ENV_DATA['deployment_type'] == 'ipi' and config.ENV_DATA['platform'].lower() == 'aws':
            # Create machineset for app worker nodes on each aws zone
            # Each zone will have one app worker node
            ms_name = list()
            for obj in machine.get_machineset_objs():
                if 'app' in obj.name:
                    ms_name.append(obj.name)
            if not ms_name:
                if len(machine.get_machineset_objs()) == 3:
                    for zone in ['a', 'b', 'c']:
                        ms_name.append(
                            machine.create_custom_machineset(instance_type='m5.4xlarge', zone=zone)
                        )
                else:
                    ms_name.append(
                        machine.create_custom_machineset(instance_type='m5.4xlarge', zone='a')
                    )
                for ms in ms_name:
                    machine.wait_for_new_node_to_be_ready(ms)
            if len(ms_name) == 3:
                exp_count = int(worker_count / 3)
            else:
                exp_count = worker_count
            for name in ms_name:
                machine.add_node(machine_set=name, count=exp_count)
            for ms in ms_name:
                machine.wait_for_new_node_to_be_ready(ms)
            worker_list = helpers.get_worker_nodes()
            ocs_worker_list = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
            scale_label_worker = machine.get_labeled_nodes(constants.SCALE_LABEL)
            ocs_worker_list.extend(scale_label_worker)
            final_list = list(dict.fromkeys(ocs_worker_list))
            for node_item in final_list:
                if node_item in worker_list:
                    worker_list.remove(node_item)
            if worker_list:
                helpers.label_worker_node(
                    node_list=worker_list, label_key='scale-label', label_value='app-scale'
                )
            return True
        elif config.ENV_DATA['deployment_type'] == 'upi' and config.ENV_DATA['platform'].lower() == 'vsphere':
            raise UnsupportedPlatformError("Unsupported Platform to add worker")
        elif config.ENV_DATA['deployment_type'] == 'upi' and config.ENV_DATA['platform'].lower() == 'baremetal':
            raise UnsupportedPlatformError("Unsupported Platform to add worker")
        elif config.ENV_DATA['deployment_type'] == 'upi' and config.ENV_DATA['platform'].lower() == 'azure':
            raise UnsupportedPlatformError("Unsupported Platform to add worker")
        else:
            raise UnavailableResourceException(
                "There is no enough worker nodes to continue app pod scaling"
            )
    def test_automated_recovery_from_failed_nodes_IPI_proactive(
            self, interface, pvc_factory, pod_factory, dc_pod_factory):
        """
        Knip-678 Automated recovery from failed nodes
        Proactive case - IPI
        """
        # Get OSD running nodes
        osd_running_nodes = get_osd_running_nodes()
        log.info(f"OSDs are running on nodes {osd_running_nodes}")
        # Label osd nodes with fedora app
        label_worker_node(osd_running_nodes,
                          label_key='dc',
                          label_value='fedora')

        # Create DC app pods
        log.info("Creating DC based app pods")
        interface = (constants.CEPHBLOCKPOOL
                     if interface == 'rbd' else constants.CEPHFILESYSTEM)
        dc_pod_obj = []
        for i in range(2):
            dc_pod = dc_pod_factory(interface=interface,
                                    node_selector={'dc': 'fedora'})
            pod.run_io_in_bg(dc_pod, fedora_dc=True)
            dc_pod_obj.append(dc_pod)

        # Get app pods running nodes
        dc_pod_node_name = get_app_pod_running_nodes(dc_pod_obj)
        log.info(f"DC app pod running nodes are {dc_pod_node_name}")

        # Get both osd and app pod running node
        common_nodes = get_both_osd_and_app_pod_running_node(
            osd_running_nodes, dc_pod_node_name)
        msg = "Common OSD and app running node(s) NOT found"
        assert (len(common_nodes) > 0), msg
        log.info(f"Common OSD and app pod running nodes are {common_nodes}")

        # Get the machine name using the node name
        machine_name = machine.get_machine_from_node_name(common_nodes[0])
        log.info(f"{common_nodes[0]} associated machine is {machine_name}")

        # Get the machineset name using machine name
        machineset_name = machine.get_machineset_from_machine_name(
            machine_name)
        log.info(
            f"{common_nodes[0]} associated machineset is {machineset_name}")

        # Add a new node and label it
        add_new_node_and_label_it(machineset_name)

        # Delete the machine
        machine.delete_machine(machine_name)
        log.info(f"Successfully deleted machine {machine_name}")

        # DC app pods on the failed node will get automatically created on
        # other running node. Waiting for all dc app pod to reach running
        # state
        pod.wait_for_dc_app_pods_to_reach_running_state(dc_pod_obj)
        log.info("All the dc pods reached running state")

        # Check all OCS pods status, they should be in running state
        all_pod_obj = pod.get_all_pods(
            namespace=defaults.ROOK_CLUSTER_NAMESPACE)
        for pod_obj in all_pod_obj:
            if ('-1-deploy' or 'ocs-deviceset') not in pod_obj.name:
                try:
                    helpers.wait_for_resource_state(
                        resource=pod_obj,
                        state=constants.STATUS_RUNNING,
                        timeout=200)
                except ResourceWrongStatusException:
                    # 'rook-ceph-crashcollector' on the failed node stucks at
                    # pending state. BZ 1810014 tracks it.
                    # Ignoring 'rook-ceph-crashcollector' pod health check as
                    # WA and deleting its deployment so that the pod
                    # disappears. Will revert this WA once the BZ is fixed
                    if 'rook-ceph-crashcollector' in pod_obj.name:
                        ocp_obj = ocp.OCP(
                            namespace=defaults.ROOK_CLUSTER_NAMESPACE)
                        pod_name = pod_obj.name
                        deployment_name = '-'.join(pod_name.split("-")[:-2])
                        command = f"delete deployment {deployment_name}"
                        ocp_obj.exec_oc_cmd(command=command)
                        log.info(f"Deleted deployment for pod {pod_obj.name}")
                    else:
                        raise

        # Check basic cluster functionality by creating resources
        # (pools, storageclasses, PVCs, pods - both CephFS and RBD),
        # run IO and delete the resources
        self.sanity_helpers.create_resources(pvc_factory, pod_factory)
        self.sanity_helpers.delete_resources()

        # Perform cluster and Ceph health checks
        self.sanity_helpers.health_check()
Пример #7
0
    def test_simultaneous_drain_of_two_ocs_nodes(self, pvc_factory,
                                                 pod_factory, dc_pod_factory,
                                                 interface):
        """
        OCS-2128/OCS-2129:
        - Create PVCs and start IO on DC based app pods
        - Add one extra node in two of the AZs and label the nodes
          with OCS storage label
        - Maintenance (mark as unscheduable and drain) 2 worker nodes
          simultaneously
        - Confirm that OCS and DC pods are in running state
        - Remove unscheduled nodes
        - Check cluster functionality by creating resources
          (pools, storageclasses, PVCs, pods - both CephFS and RBD)
        - Check cluster and Ceph health

        """
        # Get OSD running nodes
        osd_running_worker_nodes = get_osd_running_nodes()
        log.info(f"OSDs are running on nodes {osd_running_worker_nodes}")

        # Label osd nodes with fedora app
        label_worker_node(osd_running_worker_nodes,
                          label_key='dc',
                          label_value='fedora')
        log.info("Successfully labeled worker nodes with {dc:fedora}")

        # Create DC app pods
        log.info("Creating DC based app pods and starting IO in background")
        interface = (constants.CEPHBLOCKPOOL
                     if interface == 'rbd' else constants.CEPHFILESYSTEM)
        dc_pod_obj = []
        for i in range(2):
            dc_pod = dc_pod_factory(interface=interface,
                                    node_selector={'dc': 'fedora'})
            pod.run_io_in_bg(dc_pod, fedora_dc=True)
            dc_pod_obj.append(dc_pod)

        # Get the machine name using the node name
        machine_names = [
            machine.get_machine_from_node_name(osd_running_worker_node)
            for osd_running_worker_node in osd_running_worker_nodes[:2]
        ]
        log.info(f"{osd_running_worker_nodes} associated "
                 f"machine are {machine_names}")

        # Get the machineset name using machine name
        machineset_names = [
            machine.get_machineset_from_machine_name(machine_name)
            for machine_name in machine_names
        ]
        log.info(f"{osd_running_worker_nodes} associated machineset "
                 f"is {machineset_names}")

        # Add a new node and label it
        add_new_node_and_label_it(machineset_names[0])
        add_new_node_and_label_it(machineset_names[1])

        # Drain 2 nodes
        drain_nodes(osd_running_worker_nodes[:2])

        # Check the pods should be in running state
        all_pod_obj = pod.get_all_pods(wait=True)
        for pod_obj in all_pod_obj:
            if ('-1-deploy' or 'ocs-deviceset') not in pod_obj.name:
                try:
                    helpers.wait_for_resource_state(
                        resource=pod_obj,
                        state=constants.STATUS_RUNNING,
                        timeout=200)
                except ResourceWrongStatusException:
                    # 'rook-ceph-crashcollector' on the failed node stucks at
                    # pending state. BZ 1810014 tracks it.
                    # Ignoring 'rook-ceph-crashcollector' pod health check as
                    # WA and deleting its deployment so that the pod
                    # disappears. Will revert this WA once the BZ is fixed
                    if 'rook-ceph-crashcollector' in pod_obj.name:
                        ocp_obj = ocp.OCP(
                            namespace=defaults.ROOK_CLUSTER_NAMESPACE)
                        pod_name = pod_obj.name
                        deployment_name = '-'.join(pod_name.split("-")[:-2])
                        command = f"delete deployment {deployment_name}"
                        ocp_obj.exec_oc_cmd(command=command)
                        log.info(f"Deleted deployment for pod {pod_obj.name}")

        # DC app pods on the drained node will get automatically created on other
        # running node in same AZ. Waiting for all dc app pod to reach running state
        pod.wait_for_dc_app_pods_to_reach_running_state(dc_pod_obj)
        log.info("All the dc pods reached running state")

        # Remove unscheduled nodes
        # In scenarios where the drain is attempted on >3 worker setup,
        # post completion of drain we are removing the unscheduled nodes so
        # that we maintain 3 worker nodes.
        log.info(f"Removing scheduled nodes {osd_running_worker_nodes[:2]}")
        remove_node_objs = get_node_objs(osd_running_worker_nodes[:2])
        remove_nodes(remove_node_objs)

        # Check basic cluster functionality by creating resources
        # (pools, storageclasses, PVCs, pods - both CephFS and RBD),
        # run IO and delete the resources
        self.sanity_helpers.create_resources(pvc_factory, pod_factory)
        self.sanity_helpers.delete_resources()

        # Perform cluster and Ceph health checks
        self.sanity_helpers.health_check()
    def test_rwo_pvc_fencing_node_prolonged_and_short_network_failure(
        self, nodes, setup, teardown
    ):
        """
        OCS-1431/OCS-1436:
        - Start DeploymentConfig based app pods on 1 node
        - Make the node (where app pods are running) unresponsive
            by bringing its main network interface down
        - Disrupt the leader provisioner pods if not running on above selected
            node
        - Check new app pods and/or mon, osd pods scheduled on another node
            are stuck due to Multi-Attach error.
        - Power off the unresponsive node
        - Force delete the app pods and/or mon,osd pods on the unresponsive node
        - Check new app pods and/or mon, osd pods scheduled on another node comes
            into Running state
        - Run IOs on new app pods
        - Again make the node (where app pods are running) unresponsive
            by bringing its main network interface down
        - Check new app pods scheduled on another node are stuck due to
            Multi-Attach error.
        - Reboot the unresponsive node
        - When unresponsive node recovers, run IOs on new app pods
        """
        ceph_cluster, dc_pods, ceph_pods, app_pod_nodes, test_nodes, disruptor = setup

        extra_nodes = list(set(test_nodes) - set(app_pod_nodes))
        helpers.remove_label_from_worker_node(
            node_list=extra_nodes[:-1], label_key="nodetype"
        )

        # Run IO on pods
        md5sum_data = self.run_and_verify_io(
            pod_list=dc_pods, fio_filename='io_file1', run_io_in_bg=True
        )

        # Disrupt leader plugin-provisioner pods, skip if running on node to be failed
        if disruptor:
            [disruption.delete_resource() for disruption in disruptor]

        # Induce network failure on the nodes
        node.node_network_failure(app_pod_nodes)
        logger.info(f"Waiting for {self.prolong_nw_fail_time} seconds")
        sleep(self.prolong_nw_fail_time)

        # Wait for pods to be rescheduled
        for pod_obj in (dc_pods + ceph_pods):
            pod_obj.ocp.wait_for_resource(
                condition=constants.STATUS_TERMINATING,
                resource_name=pod_obj.name
            )

        # Fetch info of new pods and verify Multi-Attach error
        new_dc_pods = self.get_new_pods(dc_pods)
        assert len(new_dc_pods) == len(dc_pods), 'Unexpected number of app pods'
        self.verify_multi_attach_error(new_dc_pods)

        new_ceph_pods = []
        if ceph_pods:
            new_ceph_pods = self.get_new_pods(ceph_pods)
            assert len(new_ceph_pods) > 0, 'Unexpected number of osd pods'
            self.verify_multi_attach_error(new_ceph_pods)

        logger.info(f"Executing manual recovery steps")
        # Power off the unresponsive node
        logger.info(
            f"Powering off the unresponsive node: {app_pod_nodes}"
        )
        nodes.stop_nodes(node.get_node_objs(app_pod_nodes))

        # Force delete the app pods and/or mon,osd pods on the unresponsive node
        for pod_obj in (dc_pods + ceph_pods):
            pod_obj.delete(force=True)

        # Wait for new app pods to reach Running state
        for pod_obj in new_dc_pods:
            pod_obj.ocp.wait_for_resource(
                condition=constants.STATUS_RUNNING, resource_name=pod_obj.name,
                timeout=1200, sleep=30
            ), (
                f"App pod with name {pod_obj.name} did not reach Running state"
            )

        # Wait for mon and osd pods to reach Running state
        selectors_to_check = [constants.MON_APP_LABEL, constants.OSD_APP_LABEL]
        for selector in selectors_to_check:
            assert ceph_cluster.POD.wait_for_resource(
                condition=constants.STATUS_RUNNING, selector=selector,
                resource_count=3, timeout=1800, sleep=60
            ), (
                f"3 expected pods with selector {selector} are not in Running state"
            )

        if ceph_cluster.mon_count == 3:
            # Check ceph health
            toolbox_status = ceph_cluster.POD.get_resource_status(
                ceph_cluster.toolbox.name
            )
            if toolbox_status == constants.STATUS_TERMINATING:
                ceph_cluster.toolbox.delete(force=True)

            assert ceph_health_check(), f"Ceph cluster health is not OK"
            logger.info("Ceph cluster health is OK")

        # Verify data integrity from new pods
        for num, pod_obj in enumerate(new_dc_pods):
            assert pod.verify_data_integrity(
                pod_obj=pod_obj, file_name='io_file1',
                original_md5sum=md5sum_data[num]
            ), 'Data integrity check failed'

        # Run IO on new pods
        md5sum_data2 = self.run_and_verify_io(
            pod_list=new_dc_pods, fio_filename='io_file2', run_io_in_bg=True
        )

        helpers.label_worker_node(
            node_list=extra_nodes[:-1], label_key="nodetype", label_value="app-pod"
        )

        # Induce network failure on the node
        node.node_network_failure(extra_nodes[-1])
        logger.info(f"Waiting for {self.short_nw_fail_time} seconds")
        sleep(self.short_nw_fail_time)

        # Wait for pods to be rescheduled
        for pod_obj in new_dc_pods:
            pod_obj.ocp.wait_for_resource(
                condition=constants.STATUS_TERMINATING,
                resource_name=pod_obj.name, timeout=600, sleep=30
            )

        # Fetch info of new pods and verify Multi-Attach error
        new_dc_pods2 = self.get_new_pods(new_dc_pods)
        assert len(new_dc_pods2) == len(new_dc_pods), 'Unexpected number of app pods'
        self.verify_multi_attach_error(new_dc_pods2)

        # Reboot the unresponsive node
        logger.info(f"Rebooting the unresponsive node: {extra_nodes[-1]}")
        nodes.restart_nodes(node.get_node_objs([extra_nodes[-1]]))
        node.wait_for_nodes_status(
            node_names=[extra_nodes[-1]], status=constants.NODE_READY
        )

        # Wait for new app pods to reach Running state
        for pod_obj in new_dc_pods2:
            pod_obj.ocp.wait_for_resource(
                condition=constants.STATUS_RUNNING, resource_name=pod_obj.name,
                timeout=1200, sleep=30
            ), (
                f"App pod with name {pod_obj.name} did not reach Running state"
            )

        # Wait for mon and osd pods to reach Running state
        for selector in selectors_to_check:
            assert ceph_cluster.POD.wait_for_resource(
                condition=constants.STATUS_RUNNING, selector=selector,
                resource_count=3, timeout=1800, sleep=60
            ), (
                f"3 expected pods with selector {selector} are not in Running state"
            )

        if ceph_cluster.mon_count == 3:
            # Check ceph health
            assert ceph_health_check(), f"Ceph cluster health is not OK"
            logger.info("Ceph cluster health is OK")

        # Verify data integrity from new pods
        for num, pod_obj in enumerate(new_dc_pods2):
            assert pod.verify_data_integrity(
                pod_obj=pod_obj, file_name='io_file2',
                original_md5sum=md5sum_data2[num]
            ), 'Data integrity check for files written before second node failures failed'

        for num, pod_obj in enumerate(new_dc_pods2):
            assert pod.verify_data_integrity(
                pod_obj=pod_obj, file_name='io_file1',
                original_md5sum=md5sum_data[num]
            ), 'Data integrity check for files written before first node failures failed'

        # Run IO on new pods
        self.run_and_verify_io(
            pod_list=new_dc_pods2, fio_filename='io_file3', return_md5sum=False
        )
    def setup(
        self, request, scenario, num_of_nodes, num_of_fail_nodes,
        disrupt_provisioner, project_factory, multi_pvc_factory, dc_pod_factory
    ):
        """
        Identify the nodes and start DeploymentConfig based app pods using
        PVC with ReadWriteOnce (RWO) access mode on selected nodes

        Args:
            scenario (str): Scenario of app pods running on OCS or dedicated nodes
                (eg., 'colocated', 'dedicated')
            num_of_nodes (int): number of nodes required for running test
            num_of_fail_nodes (int): number of nodes to make unresponsive during test
            disrupt_provisioner (bool): True to disrupt the leader provisioner
                pods if not running on selected nodes, else False
            project_factory: A fixture to create new project
            multi_pvc_factory: A fixture create a set of new PVCs
            dc_pod_factory: A fixture to create deploymentconfig pods

        Returns:
            tuple: containing the params used in test cases
        """
        ocs_nodes, non_ocs_nodes = self.identify_and_add_nodes(
            scenario, num_of_nodes
        )
        test_nodes = ocs_nodes if (scenario == "colocated") else non_ocs_nodes
        logger.info(f"Using nodes {test_nodes} for running test")

        def finalizer():
            helpers.remove_label_from_worker_node(
                node_list=test_nodes, label_key="nodetype"
            )

        request.addfinalizer(finalizer)

        if len(ocs_nodes) > 4 and float(config.ENV_DATA['ocs_version']) >= 4.3:
            pod_obj = ocp.OCP(
                kind=constants.POD, namespace=config.ENV_DATA['cluster_namespace']
            )
            assert pod_obj.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                selector=constants.MON_APP_LABEL, resource_count=5, timeout=900
            )

        ceph_cluster = CephCluster()
        project = project_factory()

        # Select nodes for running app pods and inducing network failure later
        app_pod_nodes = self.select_nodes_for_app_pods(
            scenario, ceph_cluster, ocs_nodes, non_ocs_nodes,
            num_of_fail_nodes
        )

        # Create multiple RBD and CephFS backed PVCs with RWO accessmode
        num_of_pvcs = self.num_of_app_pods_per_node * num_of_fail_nodes
        rbd_pvcs = multi_pvc_factory(
            interface=constants.CEPHBLOCKPOOL, project=project, size=self.pvc_size,
            access_modes=[constants.ACCESS_MODE_RWO], num_of_pvc=num_of_pvcs
        )
        cephfs_pvcs = multi_pvc_factory(
            interface=constants.CEPHFILESYSTEM, project=project, size=self.pvc_size,
            access_modes=[constants.ACCESS_MODE_RWO], num_of_pvc=num_of_pvcs
        )

        # Create deploymentconfig based pods
        dc_pods = []
        # Start app-pods on selected node(s)
        for node_name in app_pod_nodes:
            logger.info(f"Starting app pods on the node {node_name}")
            helpers.label_worker_node(
                node_list=[node_name], label_key="nodetype",
                label_value="app-pod"
            )

            for num in range(self.num_of_app_pods_per_node):
                dc_pods.append(
                    dc_pod_factory(
                        interface=constants.CEPHBLOCKPOOL, pvc=rbd_pvcs.pop(0),
                        node_selector={'nodetype': 'app-pod'}
                    )
                )
                assert pod.verify_node_name(dc_pods[-1], node_name), (
                    f"Pod {dc_pods[-1].name} is not running on labeled node {node_name}"
                )
                dc_pods.append(
                    dc_pod_factory(
                        interface=constants.CEPHFILESYSTEM, pvc=cephfs_pvcs.pop(0),
                        node_selector={'nodetype': 'app-pod'}
                    )
                )
                assert pod.verify_node_name(dc_pods[-1], node_name), (
                    f"Pod {dc_pods[-1].name} is not running on labeled node {node_name}"
                )
            helpers.remove_label_from_worker_node(
                node_list=[node_name], label_key="nodetype"
            )

        # Label other test nodes to be able to run app pods later
        helpers.label_worker_node(
            node_list=test_nodes, label_key="nodetype", label_value="app-pod"
        )

        # Get ceph mon,osd pods running on selected node if colocated scenario
        # and extra OCS nodes are present
        ceph_pods = []
        if scenario == "colocated" and len(test_nodes) > len(ceph_cluster.osds):
            pods_to_check = ceph_cluster.osds
            # Skip mon pods if mon_count is 5 as there may not be enough nodes
            # for all mons to run after multiple node failures
            if ceph_cluster.mon_count == 3:
                pods_to_check.extend(ceph_cluster.mons)
            for pod_obj in pods_to_check:
                if pod.get_pod_node(pod_obj).name in app_pod_nodes[0]:
                    ceph_pods.append(pod_obj)
            logger.info(
                f"Colocated Mon, OSD pods: {[pod_obj.name for pod_obj in ceph_pods]}"
            )

        disruptor = []
        if disrupt_provisioner:
            disruptor = self.disrupt_plugin_provisioner_pods(app_pod_nodes)

        return ceph_cluster, dc_pods, ceph_pods, app_pod_nodes, test_nodes, disruptor
Пример #10
0
    def test_node_replacement_reactive_aws_ipi(
        self, nodes, pvc_factory, pod_factory, dc_pod_factory,
        failure, interface
    ):
        """
        Knip-894 Node replacement - AWS-IPI-Reactive

        """
        # Get worker nodes
        initial_nodes = get_worker_nodes()

        # Get OSD running nodes
        osd_running_nodes = get_osd_running_nodes()
        log.info(f"OSDs are running on nodes {osd_running_nodes}")

        # Label osd nodes with fedora app
        label_worker_node(osd_running_nodes, label_key='dc', label_value='fedora')

        # Create DC app pods
        log.info("Creating DC based app pods")
        if interface == 'rbd':
            interface = constants.CEPHBLOCKPOOL
        elif interface == 'cephfs':
            interface = constants.CEPHFILESYSTEM
        dc_pod_obj = []
        for i in range(2):
            dc_pod = dc_pod_factory(
                interface=interface, node_selector={'dc': 'fedora'})
            pod.run_io_in_bg(dc_pod, fedora_dc=True)
            dc_pod_obj.append(dc_pod)

        # Get app pods running nodes
        dc_pod_node_name = get_app_pod_running_nodes(dc_pod_obj)
        log.info(f"DC app pod running nodes are {dc_pod_node_name}")

        # Get both osd and app pod running node
        common_nodes = get_both_osd_and_app_pod_running_node(
            osd_running_nodes, dc_pod_node_name
        )
        log.info(f"Both OSD and app pod is running on nodes {common_nodes}")

        # Get the machine name using the node name
        machine_name = machine.get_machine_from_node_name(common_nodes[0])
        log.info(f"{common_nodes[0]} associated machine is {machine_name}")

        # Get the machineset name using machine name
        machineset_name = machine.get_machineset_from_machine_name(
            machine_name
        )
        log.info(
            f"{common_nodes[0]} associated machineset is {machineset_name}"
        )

        # Get the failure node obj
        failure_node_obj = get_node_objs(node_names=[common_nodes[0]])

        # Induce failure on the selected failure node
        log.info(f"Inducing failure on node {failure_node_obj[0].name}")
        if failure == "power off":
            # Power off AWS worker node instance
            nodes.stop_nodes(failure_node_obj, wait=True)
            log.info(f"Successfully powered off node: {failure_node_obj[0].name}")
        elif failure == "network failure":
            # Induce Network failure
            node_network_failure([failure_node_obj[0].name])

        # Add annotation to the failed node
        annotation = "machine.openshift.io/exclude-node-draining=''"
        machine.add_annotation_to_machine(
            annotation=annotation, machine_name=machine_name
        )

        # Delete the machine
        machine.delete_machine(machine_name)
        log.info(f"Successfully deleted machine {machine_name}")

        # Wait for the new machine to spin
        log.info("Waiting for the new node to be in ready state")
        machine.wait_for_new_node_to_be_ready(machineset_name)

        # Get the node name of new spun node
        nodes_after_new_spun_node = get_worker_nodes()
        new_spun_node = list(
            set(nodes_after_new_spun_node) - set(initial_nodes)
        )
        log.info(f"New spun node is {new_spun_node}")

        # Label it
        node_obj = ocp.OCP(kind='node')
        node_obj.add_label(
            resource_name=new_spun_node[0],
            label=constants.OPERATOR_NODE_LABEL
        )
        log.info(
            f"Successfully labeled {new_spun_node} with OCS storage label"
        )

        # DC app pods on the failed node will get automatically created on other
        # running node. Waiting for all dc app pod to reach running state
        pod.wait_for_dc_app_pods_to_reach_running_state(
            dc_pod_obj, timeout=1200
        )
        log.info("All the dc pods reached running state")

        # Check all OCS pods status, they should be in running state
        all_pod_obj = pod.get_all_pods(
            namespace=defaults.ROOK_CLUSTER_NAMESPACE
        )
        for pod_obj in all_pod_obj:
            if '-1-deploy' and 'ocs-deviceset' not in pod_obj.name:
                try:
                    helpers.wait_for_resource_state(
                        resource=pod_obj, state=constants.STATUS_RUNNING,
                        timeout=1800
                    )
                except ResourceWrongStatusException:
                    # 'rook-ceph-crashcollector' on the failed node stucks at
                    # pending state. BZ 1810014 tracks it.
                    # Ignoring 'rook-ceph-crashcollector' pod health check as
                    # WA and deleting its deployment so that the pod
                    # disappears. Will revert this WA once the BZ is fixed
                    if 'rook-ceph-crashcollector' in pod_obj.name:
                        ocp_obj = ocp.OCP(
                            namespace=defaults.ROOK_CLUSTER_NAMESPACE
                        )
                        pod_name = pod_obj.name
                        deployment_name = '-'.join(pod_name.split("-")[:-2])
                        command = f"delete deployment {deployment_name}"
                        ocp_obj.exec_oc_cmd(command=command)
                        log.info(f"Deleted deployment for pod {pod_obj.name}")

        # Check basic cluster functionality by creating resources
        # (pools, storageclasses, PVCs, pods - both CephFS and RBD),
        # run IO and delete the resources
        self.sanity_helpers.create_resources(pvc_factory, pod_factory)
        self.sanity_helpers.delete_resources()

        # Perform cluster and Ceph health checks
        self.sanity_helpers.health_check()
Пример #11
0
    def test_node_replacement_reactive_aws_ipi(self, nodes, pvc_factory,
                                               pod_factory, dc_pod_factory,
                                               failure, interface):
        """
        Knip-894 Node replacement - AWS-IPI-Reactive

        """
        # Get worker nodes
        initial_nodes = get_worker_nodes()

        # Get OSD running nodes
        osd_running_nodes = get_osd_running_nodes()
        log.info(f"OSDs are running on nodes {osd_running_nodes}")

        # Label osd nodes with fedora app
        label_worker_node(osd_running_nodes,
                          label_key='dc',
                          label_value='fedora')

        # Create DC app pods
        log.info("Creating DC based app pods")
        if interface == 'rbd':
            interface = constants.CEPHBLOCKPOOL
        elif interface == 'cephfs':
            interface = constants.CEPHFILESYSTEM
        dc_pod_obj = []
        for i in range(2):
            dc_pod = dc_pod_factory(interface=interface,
                                    node_selector={'dc': 'fedora'})
            pod.run_io_in_bg(dc_pod, fedora_dc=True)
            dc_pod_obj.append(dc_pod)

        # Get app pods running nodes
        dc_pod_node_name = get_app_pod_running_nodes(dc_pod_obj)
        log.info(f"DC app pod running nodes are {dc_pod_node_name}")

        # Get both osd and app pod running node
        common_nodes = get_both_osd_and_app_pod_running_node(
            osd_running_nodes, dc_pod_node_name)
        log.info(f"Both OSD and app pod is running on nodes {common_nodes}")

        # Get the machine name using the node name
        machine_name = machine.get_machine_from_node_name(common_nodes[0])
        log.info(f"{common_nodes[0]} associated machine is {machine_name}")

        # Get the machineset name using machine name
        machineset_name = machine.get_machineset_from_machine_name(
            machine_name)
        log.info(
            f"{common_nodes[0]} associated machineset is {machineset_name}")

        # Get the failure node obj
        failure_node_obj = get_node_objs(node_names=[common_nodes[0]])

        # Induce failure on the selected failure node
        log.info(f"Inducing failure on node {failure_node_obj[0].name}")
        if failure == "power off":
            # Power off AWS worker node instance
            nodes.stop_nodes(failure_node_obj, wait=True)
            log.info(
                f"Successfully powered off node: {failure_node_obj[0].name}")
        elif failure == "network failure":
            # Induce Network failure
            node_network_failure([failure_node_obj[0].name])

        # Add annotation to the failed node
        annotation = "machine.openshift.io/exclude-node-draining=''"
        machine.add_annotation_to_machine(annotation=annotation,
                                          machine_name=machine_name)

        # Delete the machine
        machine.delete_machine(machine_name)
        log.info(f"Successfully deleted machine {machine_name}")

        # Wait for the new machine to spin
        log.info("Waiting for the new node to be in ready state")
        machine.wait_for_new_node_to_be_ready(machineset_name)

        # Get the node name of new spun node
        nodes_after_new_spun_node = get_worker_nodes()
        new_spun_node = list(
            set(nodes_after_new_spun_node) - set(initial_nodes))
        log.info(f"New spun node is {new_spun_node}")

        # Label it
        node_obj = ocp.OCP(kind='node')
        node_obj.add_label(resource_name=new_spun_node[0],
                           label=constants.OPERATOR_NODE_LABEL)
        log.info(
            f"Successfully labeled {new_spun_node} with OCS storage label")

        # DC app pods on the failed node will get automatically created on other
        # running node. Waiting for all dc app pod to reach running state
        pod.wait_for_dc_app_pods_to_reach_running_state(dc_pod_obj,
                                                        timeout=1200)
        log.info("All the dc pods reached running state")

        nodes_helpers.wait_for_all_pods()

        # Check basic cluster functionality by creating resources
        # (pools, storageclasses, PVCs, pods - both CephFS and RBD),
        # run IO and delete the resources
        self.sanity_helpers.create_resources(pvc_factory, pod_factory)
        self.sanity_helpers.delete_resources()

        # Perform cluster and Ceph health checks
        self.sanity_helpers.health_check()
Пример #12
0
    def setup(self, request, scenario, nodes, multi_pvc_factory,
              service_account_factory, dc_pod_factory):
        """
        Identify the nodes and start multiple dc pods for the test

        Args:
            scenario (str): Scenario of app pods running on OCS or dedicated nodes
                (eg., 'colocated', 'dedicated')
            nodes: A fixture to get instance of the relevant platform nodes class
            multi_pvc_factory: A fixture create a set of new PVCs
            service_account_factory: A fixture to create a service account
            dc_pod_factory: A fixture to create dc pod

        Returns:
            list: dc pod objs

        """
        worker_nodes = helpers.get_worker_nodes()
        ocs_nodes = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
        non_ocs_nodes = list(set(worker_nodes) - set(ocs_nodes))

        def finalizer():
            helpers.remove_label_from_worker_node(node_list=worker_nodes,
                                                  label_key="nodetype")

            # Check ceph health
            ceph_health_check(tries=80)

        request.addfinalizer(finalizer)

        if (scenario == 'dedicated') and len(non_ocs_nodes) == 0:
            if config.ENV_DATA.get('deployment_type').lower() == 'ipi':
                machines = machine.get_machinesets()
                node.add_new_node_and_label_it(machines[0],
                                               num_nodes=1,
                                               mark_for_ocs_label=False)
            else:
                if config.ENV_DATA.get(
                        'platform').lower() == constants.VSPHERE_PLATFORM:
                    pytest.skip(
                        "Skipping add node in VSPHERE due to https://bugzilla.redhat.com/show_bug.cgi?id=1844521"
                    )
                is_rhel = config.ENV_DATA.get(
                    'rhel_workers') or config.ENV_DATA.get('rhel_user')
                node_type = constants.RHEL_OS if is_rhel else constants.RHCOS
                node.add_new_node_and_label_upi(node_type=node_type,
                                                num_nodes=1,
                                                mark_for_ocs_label=False)
            non_ocs_nodes = list(
                set(helpers.get_worker_nodes()) - set(ocs_nodes))

        app_pod_nodes = ocs_nodes if (scenario
                                      == "colocated") else non_ocs_nodes

        # Label nodes to be able to run app pods
        helpers.label_worker_node(node_list=app_pod_nodes,
                                  label_key="nodetype",
                                  label_value="app-pod")

        access_modes_rbd = [
            constants.ACCESS_MODE_RWO, f'{constants.ACCESS_MODE_RWX}-Block'
        ]

        access_modes_cephfs = [
            constants.ACCESS_MODE_RWO, constants.ACCESS_MODE_RWX
        ]

        pvcs_rbd = multi_pvc_factory(interface=constants.CEPHBLOCKPOOL,
                                     size=self.pvc_size,
                                     access_modes=access_modes_rbd,
                                     status=constants.STATUS_BOUND,
                                     num_of_pvc=len(access_modes_rbd))

        project = pvcs_rbd[0].project

        pvcs_cephfs = multi_pvc_factory(interface=constants.CEPHFILESYSTEM,
                                        project=project,
                                        size=self.pvc_size,
                                        access_modes=access_modes_cephfs,
                                        status=constants.STATUS_BOUND,
                                        num_of_pvc=len(access_modes_cephfs))

        pvcs = pvcs_cephfs + pvcs_rbd
        # Set volume mode on PVC objects
        for pvc_obj in pvcs:
            pvc_info = pvc_obj.get()
            setattr(pvc_obj, 'volume_mode', pvc_info['spec']['volumeMode'])

        sa_obj = service_account_factory(project=project)
        pods = []

        # Create pods
        for pvc_obj in pvcs:
            if constants.CEPHFS_INTERFACE in pvc_obj.storageclass.name:
                interface = constants.CEPHFILESYSTEM
            else:
                interface = constants.CEPHBLOCKPOOL

            num_pods = 2 if pvc_obj.access_mode == constants.ACCESS_MODE_RWX else 1
            logger.info("Creating app pods")
            for _ in range(num_pods):
                pods.append(
                    dc_pod_factory(interface=interface,
                                   pvc=pvc_obj,
                                   node_selector={'nodetype': 'app-pod'},
                                   raw_block_pv=pvc_obj.volume_mode == 'Block',
                                   sa_obj=sa_obj))

        logger.info(
            f"Created {len(pods)} pods using {len(pvcs_cephfs)} cephfs, {len(pvcs_rbd)} rbd PVCs."
        )

        return pods
Пример #13
0
def add_worker_node(instance_type=None):
    global ms_name
    ms_name = list()
    worker_list = helpers.get_worker_nodes()
    ocs_worker_list = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
    scale_worker = machine.get_labeled_nodes(constants.SCALE_LABEL)
    if config.RUN.get('use_ocs_worker_for_scale'):
        if not scale_worker:
            helpers.label_worker_node(node_list=worker_list,
                                      label_key='scale-label',
                                      label_value='app-scale')
    else:
        if not scale_worker:
            for node_item in ocs_worker_list:
                worker_list.remove(node_item)
            if worker_list:
                helpers.label_worker_node(node_list=worker_list,
                                          label_key='scale-label',
                                          label_value='app-scale')
    scale_worker_list = machine.get_labeled_nodes(constants.SCALE_LABEL)
    logging.info(f"Print existing scale worker {scale_worker_list}")

    if config.ENV_DATA['deployment_type'] == 'ipi' and config.ENV_DATA[
            'platform'].lower() == 'aws':
        log.info("Adding worker nodes on the current cluster")
        # Create machineset for app worker nodes on each zone
        for obj in machine.get_machineset_objs():
            if 'app' in obj.name:
                ms_name.append(obj.name)
        if instance_type is not None:
            instance_type = instance_type
        else:
            instance_type = 'm5.4xlarge'
        if not ms_name:
            if len(machine.get_machineset_objs()) == 3:
                for zone in ['a', 'b', 'c']:
                    ms_name.append(
                        machine.create_custom_machineset(
                            instance_type=instance_type, zone=zone))
            else:
                ms_name.append(
                    machine.create_custom_machineset(
                        instance_type=instance_type, zone='a'))
            for ms in ms_name:
                machine.wait_for_new_node_to_be_ready(ms)

        worker_list = helpers.get_worker_nodes()
        ocs_worker_list = machine.get_labeled_nodes(
            constants.OPERATOR_NODE_LABEL)
        scale_label_worker = machine.get_labeled_nodes(constants.SCALE_LABEL)
        ocs_worker_list.extend(scale_label_worker)
        final_list = list(dict.fromkeys(ocs_worker_list))
        for node_item in final_list:
            if node_item in worker_list:
                worker_list.remove(node_item)
        if worker_list:
            helpers.label_worker_node(node_list=worker_list,
                                      label_key='scale-label',
                                      label_value='app-scale')
        return True
    elif config.ENV_DATA['deployment_type'] == 'upi' and config.ENV_DATA[
            'platform'].lower() == 'vsphere':
        log.info('Running pgsql on existing worker nodes')
    elif config.ENV_DATA['deployment_type'] == 'upi' and config.ENV_DATA[
            'platform'].lower() == 'baremetal':
        log.info('Running pgsql on existing worker nodes')
    elif config.ENV_DATA['deployment_type'] == 'upi' and config.ENV_DATA[
            'platform'].lower() == 'azure':
        raise UnsupportedPlatformError("Unsupported Platform")