def test_add_node_cleanup(self): """ Test to cleanup possible resources created in TestAddNode class """ # Get info from SCALE_DATA_FILE for validation if os.path.exists(SCALE_DATA_FILE): file_data = templating.load_yaml(SCALE_DATA_FILE) namespace = file_data.get("NAMESPACE") pod_obj_file_list = file_data.get("POD_OBJ_FILE_LIST") pvc_obj_file_list = file_data.get("PVC_OBJ_FILE_LIST") else: raise FileNotFoundError ocs_obj = OCP(namespace=namespace) # Delete pods for obj_file in pod_obj_file_list: obj_file_path = f"{log_path}/{obj_file}" cmd_str = f"delete -f {obj_file_path}" ocs_obj.exec_oc_cmd(command=cmd_str) # Delete pvcs for obj_file in pvc_obj_file_list: obj_file_path = f"{log_path}/{obj_file}" cmd_str = f"delete -f {obj_file_path}" ocs_obj.exec_oc_cmd(command=cmd_str) # Delete machineset for obj in machine.get_machineset_objs(): if "app" in obj.name: machine.delete_custom_machineset(obj.name)
def collect_ocs_logs(dir_name): """ Collects OCS logs Args: dir_name (str): directory name to store OCS logs. Logs will be stored in dir_name suffix with _ocs_logs. """ log_dir_path = os.path.join( os.path.expanduser(ocsci_config.RUN['log_dir']), f"failed_testcase_ocs_logs_{ocsci_config.RUN['run_id']}") must_gather_img = ocsci_config.REPORTING['must_gather_image'] must_gather_timeout = ocsci_config.REPORTING.get('must_gather_timeout', 600) log.info(f"Must gather image: {must_gather_img} will be used.") create_directory_path(log_dir_path) dir_name = f"{dir_name}_ocs_logs" dump_dir = os.path.join(log_dir_path, dir_name) cmd = f"adm must-gather --image={must_gather_img} --dest-dir={dump_dir}" log.info(f"OCS logs will be placed in location {dump_dir}") occli = OCP() try: occli.exec_oc_cmd(cmd, out_yaml_format=False, timeout=must_gather_timeout) except CommandFailed as ex: log.error(f"Failed during must gather logs! Error: {ex}") except TimeoutExpired as ex: log.error( f"Timeout {must_gather_timeout}s for must-gather reached, command" f" exited with error: {ex}")
def assign_dummy_zones(zones, nodes, overwrite=False): """ Assign node labels to given nodes based on given zone lists. Zones are assigned so that there is the same number of nodes in each zone. Args: zones (list[str]): list of k8s zone names nodes (list[str]): list of node names to label overwrite (bool): if True, labeling will not fail on already defined zone labels (False by default) Raises: ValueError: when number of nodes is not divisible by number of zones """ if len(nodes) % len(zones) != 0: msg = "number of nodes is not divisible by number of zones" logger.error(msg) raise ValueError(msg) nodes_per_zone = int(len(nodes) / len(zones)) node_h = OCP(kind="node") for node, zone in zip(nodes, zones * nodes_per_zone): logger.info("labeling node %s with %s=%s", node, ZONE_LABEL, zone) oc_cmd = f"label node {node} {ZONE_LABEL}={zone}" if overwrite: oc_cmd += " --overwrite" node_h.exec_oc_cmd(command=oc_cmd)
def update_subscription(self, channel): """ Updating OCS operator subscription Args: channel: (str): OCS subscription channel """ if version.get_semantic_ocs_version_from_config() >= version.VERSION_4_9: subscription_name = constants.ODF_SUBSCRIPTION else: subscription_name = constants.OCS_SUBSCRIPTION subscription = OCP( resource_name=subscription_name, kind="subscription", namespace=config.ENV_DATA["cluster_namespace"], ) current_ocs_source = subscription.data["spec"]["source"] log.info(f"Current OCS subscription source: {current_ocs_source}") ocs_source = ( current_ocs_source if self.upgrade_in_current_source else constants.OPERATOR_CATALOG_SOURCE_NAME ) patch_subscription_cmd = ( f"patch subscription {subscription_name} " f'-n {self.namespace} --type merge -p \'{{"spec":{{"channel": ' f'"{channel}", "source": "{ocs_source}"}}}}\'' ) subscription.exec_oc_cmd(patch_subscription_cmd, out_yaml_format=False)
def run_must_gather(log_dir_path, image, command=None): """ Runs the must-gather tool against the cluster Args: log_dir_path (str): directory for dumped must-gather logs image (str): must-gather image registry path command (str): optional command to execute within the must-gather image """ must_gather_timeout = ocsci_config.REPORTING.get( 'must_gather_timeout', 600 ) log.info(f"Must gather image: {image} will be used.") create_directory_path(log_dir_path) cmd = f"adm must-gather --image={image} --dest-dir={log_dir_path}" if command: cmd += f" -- {command}" log.info(f"OCS logs will be placed in location {log_dir_path}") occli = OCP() try: occli.exec_oc_cmd( cmd, out_yaml_format=False, timeout=must_gather_timeout ) except CommandFailed as ex: log.error(f"Failed during must gather logs! Error: {ex}") except TimeoutExpired as ex: log.error( f"Timeout {must_gather_timeout}s for must-gather reached, command" f" exited with error: {ex}" )
def update_subscription(self, channel): """ Updating OCS operator subscription Args: channel: (str): OCS subscription channel """ subscription = OCP( resource_name=constants.OCS_SUBSCRIPTION, kind="subscription", namespace=config.ENV_DATA["cluster_namespace"], ) current_ocs_source = subscription.data["spec"]["source"] log.info(f"Current OCS subscription source: {current_ocs_source}") ocs_source = (current_ocs_source if self.upgrade_in_current_source else constants.OPERATOR_CATALOG_SOURCE_NAME) patch_subscription_cmd = ( f"patch subscription {constants.OCS_SUBSCRIPTION} " f'-n {self.namespace} --type merge -p \'{{"spec":{{"channel": ' f'"{channel}", "source": "{ocs_source}"}}}}\'') subscription.exec_oc_cmd(patch_subscription_cmd, out_yaml_format=False) subscription_plan_approval = config.DEPLOYMENT.get( "subscription_plan_approval") if subscription_plan_approval == "Manual": wait_for_install_plan_and_approve(self.namespace)
def wait_for_jenkins_deploy_status(self, status, timeout=600): """ Wait for jenkins deploy pods status to reach running/completed Args: status (str): status to reach Running or Completed timeout (int): Time in seconds to wait """ log.info(f"Waiting for jenkins-deploy pods to be reach {status} state") for project in self.projects: jenkins_deploy_pods = self.get_jenkins_deploy_pods(namespace=project) for jenkins_deploy_pod in jenkins_deploy_pods: try: wait_for_resource_state( resource=jenkins_deploy_pod, state=status, timeout=timeout ) except ResourceWrongStatusException: cmd = f'logs {jenkins_deploy_pod.name}' ocp_obj = OCP(namespace=project) output_log = ocp_obj.exec_oc_cmd(command=cmd, out_yaml_format=False) cmd = f'describe {jenkins_deploy_pod.name}' output_describe = ocp_obj.exec_oc_cmd(command=cmd, out_yaml_format=False) error_msg = ( f'{jenkins_deploy_pod.name} did not reach to ' f'{status} state after {timeout} sec' f'\n output log {jenkins_deploy_pod.name}:\n{output_log}' f'\n output describe {jenkins_deploy_pod.name}:\n{output_describe}' ) log.error(error_msg) raise UnexpectedBehaviour(error_msg)
def increase_pods_per_worker_node_count(pods_per_node=500, pods_per_core=10): """ Function to increase pods per node count, default OCP supports 250 pods per node, from OCP 4.6 limit is going to be 500, but using this function can override this param to create more pods per worker nodes. more detail: https://docs.openshift.com/container-platform/4.5/nodes/nodes/nodes-nodes-managing-max-pods.html Example: The default value for podsPerCore is 10 and the default value for maxPods is 250. This means that unless the node has 25 cores or more, by default, podsPerCore will be the limiting factor. WARN: This function will perform Unscheduling of workers and reboot so Please aware if there is any non-dc pods then expected to be terminated. Args: pods_per_node (int): Pods per node limit count pods_per_core (int): Pods per core limit count Raise: UnexpectedBehaviour if machineconfigpool not in Updating state within 40secs. """ max_pods_template = templating.load_yaml( constants.PODS_PER_NODE_COUNT_YAML) max_pods_template["spec"]["kubeletConfig"]["podsPerCore"] = pods_per_core max_pods_template["spec"]["kubeletConfig"]["maxPods"] = pods_per_node # Create new max-pods label max_pods_obj = OCS(**max_pods_template) assert max_pods_obj.create() # Apply the changes in the workers label_cmd = "label machineconfigpool worker custom-kubelet=small-pods" ocp = OCP() assert ocp.exec_oc_cmd(command=label_cmd) # First wait for Updating status to become True, default it will be False & # machine_count and ready_machine_count will be equal get_cmd = "get machineconfigpools -o yaml" timout_counter = 0 while True: output = ocp.exec_oc_cmd(command=get_cmd) update_status = (output.get("items")[1].get("status").get("conditions") [4].get("status")) if update_status == "True": break elif timout_counter >= 8: raise UnexpectedBehaviour( "After 40sec machineconfigpool not in Updating state") else: logging.info("Sleep 5secs for updating status change") timout_counter += 1 time.sleep(5) # Validate either change is successful output = ocp.exec_oc_cmd(command=get_cmd) machine_count = output.get("items")[1].get("status").get("machineCount") # During manual execution observed each node took 240+ sec for update timeout = machine_count * 300 utils.wait_for_machineconfigpool_status(node_type=constants.WORKER_MACHINE, timeout=timeout)
def run_must_gather(log_dir_path, image, command=None): """ Runs the must-gather tool against the cluster Args: log_dir_path (str): directory for dumped must-gather logs image (str): must-gather image registry path command (str): optional command to execute within the must-gather image """ # Must-gather has many changes on 4.6 which add more time to the collection. # https://github.com/red-hat-storage/ocs-ci/issues/3240 ocs_version = float(ocsci_config.ENV_DATA["ocs_version"]) timeout = 1500 if ocs_version >= 4.6 else 600 must_gather_timeout = ocsci_config.REPORTING.get("must_gather_timeout", timeout) log.info(f"Must gather image: {image} will be used.") create_directory_path(log_dir_path) cmd = f"adm must-gather --image={image} --dest-dir={log_dir_path}" if command: cmd += f" -- {command}" log.info(f"OCS logs will be placed in location {log_dir_path}") occli = OCP() try: occli.exec_oc_cmd(cmd, out_yaml_format=False, timeout=must_gather_timeout) except CommandFailed as ex: log.error(f"Failed during must gather logs! Error: {ex}") except TimeoutExpired as ex: log.error( f"Timeout {must_gather_timeout}s for must-gather reached, command" f" exited with error: {ex}" )
def create_dummy_osd(deployment): """ Replace one of OSD pods with pod that contains all data from original OSD but doesn't run osd daemon. This can be used e.g. for direct acccess to Ceph Placement Groups. Args: deployment (str): Name of deployment to use Returns: list: first item is dummy deployment object, second item is dummy pod object """ oc = OCP(kind=constants.DEPLOYMENT, namespace=config.ENV_DATA.get('cluster_namespace')) osd_data = oc.get(deployment) dummy_deployment = create_unique_resource_name('dummy', 'osd') osd_data['metadata']['name'] = dummy_deployment osd_containers = osd_data.get('spec').get('template').get('spec').get( 'containers') # get osd container spec original_osd_args = osd_containers[0].get('args') osd_data['spec']['template']['spec']['containers'][0]['args'] = [] osd_data['spec']['template']['spec']['containers'][0]['command'] = [ '/bin/bash', '-c', 'sleep infinity' ] osd_file = tempfile.NamedTemporaryFile(mode='w+', prefix=dummy_deployment, delete=False) with open(osd_file.name, "w") as temp: yaml.dump(osd_data, temp) oc.create(osd_file.name) # downscale the original deployment and start dummy deployment instead oc.exec_oc_cmd(f"scale --replicas=0 deployment/{deployment}") oc.exec_oc_cmd(f"scale --replicas=1 deployment/{dummy_deployment}") osd_list = pod.get_osd_pods() dummy_pod = [pod for pod in osd_list if dummy_deployment in pod.name][0] wait_for_resource_state(resource=dummy_pod, state=constants.STATUS_RUNNING, timeout=60) ceph_init_cmd = '/rook/tini' + ' ' + ' '.join(original_osd_args) try: logger.info('Following command should expire after 7 seconds') dummy_pod.exec_cmd_on_pod(ceph_init_cmd, timeout=7) except TimeoutExpired: logger.info('Killing /rook/tini process') try: dummy_pod.exec_bash_cmd_on_pod( "kill $(ps aux | grep '[/]rook/tini' | awk '{print $2}')") except CommandFailed: pass return dummy_deployment, dummy_pod
def drain_nodes(node_names): """ Drain nodes Args: node_names (list): The names of the nodes """ ocp = OCP(kind='node') node_names = print(*node_names, sep=' ') ocp.exec_oc_cmd(f"adm drain {node_names}")
def start_build(self): """ Start build on jenkins """ for project in self.projects: for build_num in range(1, self.num_of_builds + 1): log.info(f"Start Jenkins build on {project} project, build number:{build_num} ") cmd = f"start-build {constants.JENKINS_BUILD}" build = OCP(namespace=project) build.exec_oc_cmd(command=cmd, out_yaml_format=False)
def create_app_jenkins(self): """ create application jenkins """ for project in self.projects: log.info(f'create app jenkins on project {project}') ocp_obj = OCP(namespace=project) ocp_obj.new_project(project) cmd = 'new-app --name=jenkins-ocs-rbd --template=jenkins-persistent-ocs' ocp_obj.exec_oc_cmd(command=cmd, out_yaml_format=False)
def add_annotation_to_machine(annotation, machine_name): """ Add annotation to the machine Args: annotation (str): Annotation to be set on the machine eg: annotation = "machine.openshift.io/exclude-node-draining=''" machine_name (str): machine name """ ocp_obj = OCP(kind="machine", namespace=constants.OPENSHIFT_MACHINE_API_NAMESPACE) command = f"annotate machine {machine_name} {annotation}" log.info(f"Adding annotation: {command} to machine {machine_name} ") ocp_obj.exec_oc_cmd(command)
def drain_nodes(node_names): """ Drain nodes Args: node_names (list): The names of the nodes """ ocp = OCP(kind='node') node_names = ' '.join(node_names) log.info(f'Draining nodes {node_names}') ocp.exec_oc_cmd(f"adm drain {node_names}")
def retrive_files_from_pod(pod_name, localpath, remotepath): """ Download a file from pod Args: pod_name (str): Name of the pod localpath (str): Local file to download remotepath (str): Target path on the pod """ cmd = f'cp {pod_name}:{remotepath} {os.path.expanduser(localpath)}' ocp_obj = OCP() ocp_obj.exec_oc_cmd(command=cmd)
def verify_running_acm(): """ Detect ACM and its version on Cluster """ mch_cmd = OCP(namespace=ACM_NAMESPACE) acm_status = mch_cmd.exec_oc_cmd( "get mch -o jsonpath='{.items[].status.phase}'", out_yaml_format=False) assert acm_status == "Running", f"ACM status is {acm_status}" acm_version = mch_cmd.exec_oc_cmd( "get mch -o jsonpath='{.items[].status.currentVersion}'", out_yaml_format=False) log.info(f"ACM Version Detected: {acm_version}")
def schedule_nodes(node_names): """ Change nodes to be scheduled Args: node_names (list): The names of the nodes """ ocp = OCP(kind='node') node_names_str = ' '.join(node_names) ocp.exec_oc_cmd(f"adm uncordon {node_names_str}") log.info(f"Scheduling nodes {node_names_str}") wait_for_nodes_status(node_names)
def enable_console_plugin(): """ Enables console plugin for ODF """ ocs_version = version.get_semantic_ocs_version_from_config() if (ocs_version >= version.VERSION_4_9 and ocsci_config.ENV_DATA["enable_console_plugin"]): log.info("Enabling console plugin") ocp_obj = OCP() patch = '\'[{"op": "add", "path": "/spec/plugins", "value": ["odf-console"]}]\'' patch_cmd = ( f"patch console.operator cluster -n {constants.OPENSHIFT_STORAGE_NAMESPACE}" f" --type json -p {patch}") ocp_obj.exec_oc_cmd(command=patch_cmd)
def add_node(machine_set, count): """ Add new node to the cluster Args: machine_set (str): Name of a machine set to get increase replica count count (int): Count to increase Returns: bool: True if commands executes successfully """ ocp = OCP(namespace=constants.OPENSHIFT_MACHINE_API_NAMESPACE) ocp.exec_oc_cmd(f"scale --replicas={count} machinesets {machine_set}") return True
def drain_nodes(node_names): """ Drain nodes Args: node_names (list): The names of the nodes """ ocp = OCP(kind='node') node_names_str = ' '.join(node_names) log.info(f'Draining nodes {node_names_str}') ocp.exec_oc_cmd( f"adm drain {node_names_str} --force=true --ignore-daemonsets " f"--delete-local-data")
def set_replica_count(machine_set, count): """ Change the replica count of a machine set. Args: machine_set (str): Name of the machine set count (int): The number of the new replica count Returns: bool: True if the change was made successfully. False otherwise """ ocp = OCP(namespace=constants.OPENSHIFT_MACHINE_API_NAMESPACE) ocp.exec_oc_cmd(f"scale --replicas={count} machinesets {machine_set}") return True
def unschedule_nodes(node_names): """ Change nodes to be unscheduled Args: node_names (list): The names of the nodes """ ocp = OCP(kind='node') node_names_str = ' '.join(node_names) log.info(f"Unscheduling nodes {node_names_str}") ocp.exec_oc_cmd(f"adm cordon {node_names_str}") wait_for_nodes_status(node_names, status=constants.NODE_READY_SCHEDULING_DISABLED)
def finalizer(): # Get the deployment replica count deploy_obj = OCP( kind=constants.DEPLOYMENT, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) noobaa_deploy_obj = deploy_obj.get( resource_name=constants.NOOBAA_OPERATOR_DEPLOYMENT) if noobaa_deploy_obj["spec"]["replicas"] != 1: logger.info( f"Scaling back {constants.NOOBAA_OPERATOR_DEPLOYMENT} deployment to replica: 1" ) deploy_obj.exec_oc_cmd( f"scale deployment {constants.NOOBAA_OPERATOR_DEPLOYMENT} --replicas=1" )
def unschedule_nodes(node_names): """ Change nodes to be unscheduled Args: node_names (list): The names of the nodes """ ocp = OCP(kind='node') for node_name in node_names: ocp.exec_oc_cmd(f"adm cordon {node_name}") wait_for_nodes_status( node_names, status=constants.NODE_READY_SCHEDULING_DISABLED )
def get_pod_name_by_pattern( pattern="client", namespace=None, filter=None, ): """ In a given namespace find names of the pods that match the given pattern Args: pattern (str): name of the pod with given pattern namespace (str): Namespace value filter (str): pod name to filter from the list Returns: pod_list (list): List of pod names matching the pattern """ namespace = namespace if namespace else ocsci_config.ENV_DATA[ "cluster_namespace"] ocp_obj = OCP(kind="pod", namespace=namespace) pod_names = ocp_obj.exec_oc_cmd("get pods -o name", out_yaml_format=False) pod_names = pod_names.split("\n") pod_list = [] for name in pod_names: if filter is not None and re.search(filter, name): log.info(f"Pod name filtered {name}") elif re.search(pattern, name): (_, name) = name.split("/") log.info(f"pod name match found appending {name}") pod_list.append(name) return pod_list
def validate_cluster_import(cluster_name): """ Validate ACM status of managed cluster Args: cluster_name: (str): cluster name to validate Assert: All conditions of selected managed cluster should be "True", Failed otherwise Return: True, if not AssertionError """ config.switch_ctx(0) oc_obj = OCP(kind=ACM_MANAGED_CLUSTERS) conditions = oc_obj.exec_oc_cmd( f"get managedclusters {cluster_name} -ojsonpath='{{.status.conditions}}'" ) log.debug(conditions) for dict_status in conditions: log.info(f"Message: {dict_status.get('message')}") log.info(f"Status: {dict_status.get('status')}") assert (dict_status.get("status") == "True" ), f"Status is not True, but: {dict_status.get('status')}" # Return true if Assertion error was not raised: return True
def get_new_device_paths(device_sets_required, osd_size_capacity_requested): """ Get new device paths to add capacity over Baremetal cluster Args: device_sets_required (int) : Count of device sets to be added osd_size_capacity_requested (int) : Requested OSD size capacity Returns: cur_device_list (list) : List containing added device paths """ ocp_obj = OCP() workers = get_typed_nodes(node_type="worker") worker_names = [worker.name for worker in workers] output = ocp_obj.exec_oc_cmd("get localvolume local-block -n local-storage -o yaml") cur_device_list = output["spec"]["storageClassDevices"][0]["devicePaths"] path = os.path.join(constants.EXTERNAL_DIR, "device-by-id-ocp") utils.clone_repo(constants.OCP_QE_DEVICEPATH_REPO, path) os.chdir(path) utils.run_cmd("ansible-playbook devices_by_id.yml") with open("local-storage-block.yaml", "r") as cloned_file: with open("local-block.yaml", "w") as our_file: device_from_worker1 = device_sets_required device_from_worker2 = device_sets_required device_from_worker3 = device_sets_required cur_line = cloned_file.readline() while "devicePaths:" not in cur_line: our_file.write(cur_line) cur_line = cloned_file.readline() our_file.write(cur_line) cur_line = cloned_file.readline() # Add required number of device path from each node while cur_line: if str(osd_size_capacity_requested) in cur_line: if device_from_worker1 and (str(worker_names[0]) in cur_line): if not any(s in cur_line for s in cur_device_list): our_file.write(cur_line) device_from_worker1 = device_from_worker1 - 1 if device_from_worker2 and (str(worker_names[1]) in cur_line): if not any(s in cur_line for s in cur_device_list): our_file.write(cur_line) device_from_worker2 = device_from_worker2 - 1 if device_from_worker3 and (str(worker_names[2]) in cur_line): if not any(s in cur_line for s in cur_device_list): our_file.write(cur_line) device_from_worker3 = device_from_worker3 - 1 cur_line = cloned_file.readline() local_block_yaml = open("local-block.yaml") lvcr = yaml.load(local_block_yaml, Loader=yaml.FullLoader) new_dev_paths = lvcr["spec"]["storageClassDevices"][0]["devicePaths"] log.info(f"Newly added devices are: {new_dev_paths}") assert len(new_dev_paths) == (len(worker_names) * device_sets_required), ( f"Current devices available = {len(new_dev_paths)}" ) os.chdir(constants.TOP_DIR) shutil.rmtree(path) cur_device_list.extend(new_dev_paths) return cur_device_list
def cleanup(self): """ Clean up """ for project in self.projects: log.info(f"Delete Jenkins project: {project}") ocp_obj = OCP(namespace=project) ocp_obj.delete_project(project) log.info("Delete Jenkins Template") ocp_obj = OCP() cmd = "delete template.template.openshift.io/jenkins-persistent-ocs -n openshift" ocp_obj.exec_oc_cmd(command=cmd, out_yaml_format=False) # Wait for the resources to delete # https://github.com/red-hat-storage/ocs-ci/issues/2417 time.sleep(120)
def add_storage_capacity(capacity, storagecluster_name, namespace=defaults.ROOK_CLUSTER_NAMESPACE): """ Add storage capacity to the cluster Args: capacity (str): Size of the storage storagecluster_name (str): Name of a storage cluster Returns: bool: True if commands executes successfully """ ocp = OCP(namespace=namespace) # ToDo Update patch command with pr https://github.com/red-hat-storage/ocs-ci/pull/803 cmd = f''' patch storagecluster/{storagecluster_name} --type='json' -p='[{{"op": "replace", "path": "/spec/storageDeviceSets/0/dataPVCTemplate/spec/resources/requests/storage", "value":{capacity}}}]' ''' ocp.exec_oc_cmd(cmd) return True