Пример #1
0
def add_new_node_and_label_upi(node_type, num_nodes, mark_for_ocs_label=True, node_conf=None):
    """
    Add a new node for aws/vmware upi platform and label it

    Args:
        node_type (str): Type of node, RHEL or RHCOS
        num_nodes (int): number of nodes to add
        mark_for_ocs_label (bool): True if label the new node
        node_conf (dict): The node configurations.

    Retuns:
        bool: True if node addition has done successfully

    """
    node_conf = node_conf or {}
    initial_nodes = tests.helpers.get_worker_nodes()
    from ocs_ci.ocs.platform_nodes import PlatformNodesFactory
    plt = PlatformNodesFactory()
    node_util = plt.get_nodes_platform()
    node_util.create_and_attach_nodes_to_cluster(node_conf, node_type, num_nodes)
    for sample in TimeoutSampler(
        timeout=600, sleep=6, func=tests.helpers.get_worker_nodes
    ):
        if len(sample) == len(initial_nodes) + num_nodes:
            break

    nodes_after_exp = tests.helpers.get_worker_nodes()
    wait_for_nodes_status(
        node_names=tests.helpers.get_worker_nodes(),
        status=constants.NODE_READY
    )

    new_spun_nodes = list(set(nodes_after_exp) - set(initial_nodes))
    if node_type == constants.RHEL_OS:
        set_selinux_permissions(workers=new_spun_nodes)

    if mark_for_ocs_label:
        node_obj = ocp.OCP(kind='node')
        for new_spun_node in new_spun_nodes:
            node_obj.add_label(
                resource_name=new_spun_node,
                label=constants.OPERATOR_NODE_LABEL
            )
            logging.info(
                f"Successfully labeled {new_spun_node} with OCS storage label"
            )
    return True
Пример #2
0
def add_disk_to_node(node_obj, disk_size=None):
    """
    Add a new disk to a node

    Args:
        node_obj (ocs_ci.ocs.resources.ocs.OCS): The node object
        disk_size (int): The size of the new disk to attach. If not specified,
            the disk size will be equal to the size of the previous disk.

    """
    from ocs_ci.ocs.platform_nodes import PlatformNodesFactory

    plt = PlatformNodesFactory()
    node_util = plt.get_nodes_platform()

    if not disk_size:
        pv_objs = get_pv_objs_in_sc(sc_name=constants.LOCAL_BLOCK_RESOURCE)
        disk_size = get_pv_size(pv_objs[-1])

    node_util.create_and_attach_volume(node=node_obj, size=disk_size)
Пример #3
0
def osd_node_reboot():
    """
    Rebooting worker node that running OSD

    Raises:
        AssertionError: in case the ceph-tools pod was not recovered

    """
    nodes = PlatformNodesFactory().get_nodes_platform()
    osd_nodes_names = get_osd_running_nodes()
    osd_node_to_reboot = list()
    for node in get_nodes():
        node_name = get_node_name(node)
        if node_name == osd_nodes_names[0]:
            osd_node_to_reboot.append(node)
    log.info(f"Rebooting OSD node: {get_node_name(osd_node_to_reboot[0])}")
    nodes.restart_nodes(osd_node_to_reboot)

    log.info("Sleeping 5 minutes")
    time.sleep(320)
    assert (
        wait_for_ct_pod_recovery()
    ), "Ceph tools pod failed to come up on another node"
Пример #4
0
def worker_node_shutdown(abrupt):
    """
    Shutdown worker node that running ocs-operator pod

    Args:
        abrupt: (bool): True if abrupt shutdown, False for permanent shutdown

    Raises:
        AssertionError: in case the ceph-tools pod was not recovered

    """

    nodes = PlatformNodesFactory().get_nodes_platform()
    log.info(f"Abrupt {abrupt}")
    # get ocs-operator node:
    ocs_operator_node_name = get_ocs_operator_node_name()

    # get workers node objects:
    node_to_shutdown = list()
    for node in get_nodes():
        node_name = get_node_name(node)
        log.info(f"node: {node_name}, ocs operator node: {ocs_operator_node_name}")
        if node_name == ocs_operator_node_name:
            node_to_shutdown.append(node)
            log.info(f"node to shutdown: {get_node_name(node_to_shutdown[0])}")
            nodes.stop_nodes(node_to_shutdown)
            log.info("stop instance - done!")
            break

    log.info("Sleeping 5 minutes")
    time.sleep(320)
    assert (
        wait_for_ct_pod_recovery()
    ), "Ceph tools pod failed to come up on another node"
    if abrupt:
        log.info("Abrupt Shutdown")
        if node_to_shutdown:
            nodes.start_nodes(nodes=node_to_shutdown)
Пример #5
0
def delete_and_create_osd_node_vsphere_upi_lso(osd_node_name, use_existing_node=False):
    """
    Unschedule, drain and delete osd node, and creating a new osd node.
    At the end of the function there should be the same number of osd nodes as
    it was in the beginning, and also ceph health should be OK.
    This function is for vSphere UPI.

    Args:
        osd_node_name (str): the name of the osd node
        use_existing_node (bool): If False, create a new node and label it.
            If True, use an existing node to replace the deleted node
            and label it.

    Returns:
        str: The new node name

    """
    from ocs_ci.ocs.platform_nodes import PlatformNodesFactory
    from ocs_ci.ocs.resources.storage_cluster import get_osd_size

    sc_name = constants.LOCAL_BLOCK_RESOURCE
    old_pv_objs = get_pv_objs_in_sc(sc_name)

    osd_node = get_node_objs(node_names=[osd_node_name])[0]
    osd_pod = get_node_pods(osd_node_name, pods_to_search=pod.get_osd_pods())[0]
    osd_id = pod.get_osd_pod_id(osd_pod)
    log.info(f"osd id to remove = {osd_id}")
    # Save the node hostname before deleting the node
    osd_node_hostname_label = get_node_hostname_label(osd_node)

    log.info("Scale down node deployments...")
    scale_down_deployments(osd_node_name)
    log.info("Scale down deployments finished successfully")

    new_node_name = delete_and_create_osd_node_vsphere_upi(
        osd_node_name, use_existing_node
    )
    assert new_node_name, "Failed to create a new node"
    log.info(f"New node created successfully. Node name: {new_node_name}")

    # If we use LSO, we need to create and attach a new disk manually
    new_node = get_node_objs(node_names=[new_node_name])[0]
    plt = PlatformNodesFactory()
    node_util = plt.get_nodes_platform()
    osd_size = get_osd_size()
    log.info(
        f"Create a new disk with size {osd_size}, and attach to node {new_node_name}"
    )
    node_util.create_and_attach_volume(node=new_node, size=osd_size)

    new_node_hostname_label = get_node_hostname_label(new_node)
    log.info(
        "Replace the old node with the new worker node in localVolumeDiscovery and localVolumeSet"
    )
    res = add_new_node_to_lvd_and_lvs(
        old_node_name=osd_node_hostname_label,
        new_node_name=new_node_hostname_label,
    )
    assert res, "Failed to add the new node to LVD and LVS"

    log.info("Verify new pv is available...")
    is_new_pv_available = verify_new_pv_available_in_sc(old_pv_objs, sc_name)
    assert is_new_pv_available, "New pv is not available"
    log.info("Finished verifying that the new pv is available")

    osd_removal_job = pod.run_osd_removal_job(osd_id)
    assert osd_removal_job, "ocs-osd-removal failed to create"
    is_completed = (pod.verify_osd_removal_job_completed_successfully(osd_id),)
    assert is_completed, "ocs-osd-removal-job is not in status 'completed'"
    log.info("ocs-osd-removal-job completed successfully")

    expected_num_of_deleted_pvs = 1
    num_of_deleted_pvs = delete_released_pvs_in_sc(sc_name)
    assert (
        num_of_deleted_pvs == expected_num_of_deleted_pvs
    ), f"num of deleted PVs is {num_of_deleted_pvs} instead of {expected_num_of_deleted_pvs}"
    log.info("Successfully deleted old pv")

    is_deleted = pod.delete_osd_removal_job(osd_id)
    assert is_deleted, "Failed to delete ocs-osd-removal-job"
    log.info("ocs-osd-removal-job deleted successfully")

    return new_node_name