def delete_obc(self): """ Clenaup OBC resources created above """ if config.ENV_DATA["platform"] in constants.ON_PREM_PLATFORMS: logger.info(f"Deleting rgw obc {self.obc_rgw}") obcrgw = OCP(kind="ObjectBucketClaim", resource_name=f"{self.obc_rgw}") run_cmd(f"oc delete obc/{self.obc_rgw}") obcrgw.wait_for_delete(resource_name=f"{self.obc_rgw}", timeout=300) logger.info(f"Deleting mcg obc {self.obc_mcg}") obcmcg = OCP(kind="ObjectBucketClaim", resource_name=f"{self.obc_mcg}") run_cmd(f"oc delete obc/{self.obc_mcg} -n " f"{defaults.ROOK_CLUSTER_NAMESPACE}") obcmcg.wait_for_delete(resource_name=f"{self.obc_mcg}", timeout=300)
class ElasticSearch(object): """ ElasticSearch Environment """ def __init__(self): """ Initializer function """ log.info("Initializing the Elastic-Search environment object") self.namespace = "elastic-system" self.eck_file = "ocs_ci/templates/app-pods/eck.1.3.1-all-in-one.yaml" self.dumper_file = "ocs_ci/templates/app-pods/esclient.yaml" self.pvc = "ocs_ci/templates/app-pods/es-pvc.yaml" self.crd = "ocs_ci/templates/app-pods/esq.yaml" # Creating some different types of OCP objects self.ocp = OCP(kind="pod", resource_name="elastic-operator-0", namespace=self.namespace) self.ns_obj = OCP(kind="namespace", namespace=self.namespace) self.es = OCP(resource_name="quickstart-es-http", namespace=self.namespace) self.elasticsearch = OCP(namespace=self.namespace, kind="elasticsearch") self.password = OCP( kind="secret", resource_name="quickstart-es-elastic-user", namespace=self.namespace, ) # Deploy the ECK all-in-one.yaml file self._deploy_eck() # Deploy the Elastic-Search server self._deploy_es() # Verify that ES is Up & Running timeout = 600 while timeout > 0: if self.get_health(): log.info("The ElasticSearch server is ready !") break else: log.warning("The ElasticSearch server is not ready yet") log.info("going to sleep for 30 sec. before next check") time.sleep(30) timeout -= 30 self._deploy_data_dumper_client() # Connect to the server self.con = self._es_connect() def _deploy_eck(self): """ Deploying the ECK environment for the Elasticsearch, and make sure it is in Running mode """ log.info("Deploying the ECK environment for the ES cluster") self.ocp.apply(self.eck_file) for es_pod in TimeoutSampler(300, 10, get_pod_name_by_pattern, "elastic-operator", self.namespace): try: if es_pod[0] is not None: self.eckpod = es_pod[0] log.info(f"The ECK pod {self.eckpod} is ready !") break except IndexError: log.info("ECK operator pod not ready yet") def _deploy_data_dumper_client(self): """ Deploying elastic search client pod with utility which dump all the data from the server to .tgz file """ log.info("Deploying the es client for dumping all data") self.ocp.apply(self.dumper_file) for dmp_pod in TimeoutSampler(300, 10, get_pod_name_by_pattern, "es-dumper", self.namespace): try: if dmp_pod[0] is not None: self.dump_pod = dmp_pod[0] log.info( f"The dumper client pod {self.dump_pod} is ready !") break except IndexError: log.info("Dumper pod not ready yet") def get_ip(self): """ This function return the IP address of the Elasticsearch cluster. this IP is to use inside the OCP cluster Return str : String that represent the Ip Address. """ return self.es.get()["spec"]["clusterIP"] def get_port(self): """ This function return the port of the Elasticsearch cluster. Return str : String that represent the port. """ return self.es.get()["spec"]["ports"][0]["port"] def _deploy_es(self): log.info("Deploy the PVC for the ElasticSearch cluster") self.ocp.apply(self.pvc) log.info("Deploy the ElasticSearch cluster") self.ocp.apply(self.crd) for es_pod in TimeoutSampler(300, 20, get_pod_name_by_pattern, "quickstart-es-default", self.namespace): try: if es_pod[0] is not None: self.espod = es_pod[0] log.info(f"The ElasticSearch pod {self.espod} Started") break except IndexError: log.info("elasticsearch pod not ready yet") es_pod = OCP(kind="pod", namespace=self.namespace) log.info("Waiting for ElasticSearch to Run") assert es_pod.wait_for_resource( condition=constants.STATUS_RUNNING, resource_name=self.espod, sleep=30, timeout=600, ) log.info("Elastic Search is ready !!!") def get_health(self): """ This method return the health status of the Elasticsearch. Returns: bool : True if the status is green (OK) otherwise - False """ return self.elasticsearch.get( )["items"][0]["status"]["health"] == "green" def get_password(self): """ This method return the password used to connect the Elasticsearch. Returns: str : The password as text """ return base64.b64decode( self.password.get()["data"]["elastic"]).decode("utf-8") def cleanup(self): """ Cleanup the environment from all Elasticsearch components, and from the port forwarding process. """ log.info("Teardown the Elasticsearch environment") log.info("Deleting all resources") log.info("Deleting the dumper client pod") self.ocp.delete(yaml_file=self.dumper_file) log.info("Deleting the es resource") self.ocp.delete(yaml_file=self.crd) log.info("Deleting the es project") self.ns_obj.delete_project(project_name=self.namespace) self.ns_obj.wait_for_delete(resource_name=self.namespace, timeout=180) def _es_connect(self): """ Create a connection to the local ES Returns: Elasticsearch: elasticsearch connection object Raise: ConnectionError: if can not connect to the server """ try: es = Elasticsearch([{ "host": self.get_ip(), "port": self.get_port() }]) except esexp.ConnectionError: log.error("Can not connect to ES server in the LocalServer") raise return es def get_indices(self): """ Getting list of all indices in the ES server - all created by the test, the installation of the ES was without any indexes pre-installed. Returns: list : list of all indices defined in the ES server """ results = [] log.info("Getting all indices") for ind in self.con.indices.get_alias("*"): results.append(ind) return results def _copy(self, es): """ Copy All data from the internal ES server to the main ES. **This is deprecated function** , use the dump function, and load the data from the files for the main ES server Args: es (obj): elasticsearch object which connected to the main ES """ query = {"size": 1000, "query": {"match_all": {}}} for ind in self.get_indices(): log.info(f"Reading {ind} from internal ES server") try: result = self.con.search(index=ind, body=query) except esexp.NotFoundError: log.warning(f"{ind} Not found in the Internal ES.") continue log.debug(f"The results from internal ES for {ind} are :{result}") log.info(f"Writing {ind} into main ES server") for doc in result["hits"]["hits"]: log.debug(f"Going to write : {doc}") es.index(index=ind, doc_type="_doc", body=doc["_source"]) def dumping_all_data(self, target_path): """ Dump All data from the internal ES server to .tgz file. Args: target_path (str): the path where the results file will be copy into Return: bool: True if the dump operation succeed and return the results data to the host otherwise False """ log.info("dumping data from ES server to .tgz file") rsh_cmd = f"rsh {self.dump_pod} /elasticsearch-dump/esdumper.py --ip {self.get_ip()} --port {self.get_port()}" result = self.ocp.exec_oc_cmd(rsh_cmd, out_yaml_format=False, timeout=1200) if "ES dump is done." not in result: log.error("There is no data in the Elasticsearch server") return False else: src_file = result.split()[-1] log.info(f"Copy {src_file} from the client pod") cp_command = f"cp {self.dump_pod}:{src_file} {target_path}/FullResults.tgz" result = self.ocp.exec_oc_cmd(cp_command, timeout=120) log.info(f"The output from the POD is {result}") log.info("Extracting the FullResults.tgz file") kwargs = {"cwd": target_path} results = run_command(f"tar zxvf {target_path}/FullResults.tgz", **kwargs) log.debug(f"The untar results is {results}") if "Error in command" in results: log.warning("Can not untar the dumped file") return False return True
class RipSaw(object): """ Workload operation using RipSaw """ def __init__(self, **kwargs): """ Initializer function Args: kwargs (dict): Following kwargs are valid repo: Ripsaw repo to used - a github link branch: branch to use from the repo namespace: namespace for the operator Example Usage: r1 = RipSaw() r1.apply_crd(crd='ripsaw_v1alpha1_ripsaw_crd.yaml') # use oc apply to apply custom modified bench my_custom_bench = my_custom_bench.yaml run_cmd('oc apply -f my_custom_bench') """ self.args = kwargs self.repo = self.args.get('repo', 'https://github.com/cloud-bulldozer/ripsaw') self.branch = self.args.get('branch', 'master') self.namespace = self.args.get('namespace', RIPSAW_NAMESPACE) self.pgsql_is_setup = False self.ocp = OCP() self.ns_obj = OCP(kind='namespace') self.pod_obj = OCP(namespace=RIPSAW_NAMESPACE, kind='pod') self._create_namespace() self._clone_ripsaw() def _create_namespace(self): """ create namespace for RipSaw """ self.ocp.new_project(self.namespace) def _clone_ripsaw(self): """ clone the ripaw repo """ self.dir = tempfile.mkdtemp(prefix='ripsaw_') try: log.info(f'cloning ripsaw in {self.dir}') git_clone_cmd = f'git clone -b {self.branch} {self.repo} ' run(git_clone_cmd, shell=True, cwd=self.dir, check=True) self.crd = 'resources/crds/' self.operator = 'resources/operator.yaml' except (CommandFailed, CalledProcessError) as cf: log.error('Error during cloning of ripsaw repository') raise cf def apply_crd(self, crd): """ Apply the CRD Args: crd (str): Name of file to apply """ self.dir += '/ripsaw' run('oc apply -f deploy', shell=True, check=True, cwd=self.dir) run(f'oc apply -f {crd}', shell=True, check=True, cwd=self.dir) run(f'oc apply -f {self.operator}', shell=True, check=True, cwd=self.dir) def cleanup(self): run(f'oc delete -f {self.crd}', shell=True, cwd=self.dir) run(f'oc delete -f {self.operator}', shell=True, cwd=self.dir) run('oc delete -f deploy', shell=True, cwd=self.dir) run_cmd(f'oc delete project {self.namespace}') self.ns_obj.wait_for_delete(resource_name=self.namespace) # Reset namespace to default switch_to_default_rook_cluster_project()
class AMQ(object): """ Workload operation using AMQ """ def __init__(self, **kwargs): """ Initializer function Args: kwargs (dict): Following kwargs are valid namespace: namespace for the operator repo: AMQ repo where all necessary yaml file are there - a github link branch: branch to use from the repo """ self.args = kwargs self.repo = self.args.get('repo', constants.KAFKA_OPERATOR) self.branch = self.args.get('branch', 'master') self.ocp = OCP() self.ns_obj = OCP(kind='namespace') self.pod_obj = OCP(kind='pod') self.kafka_obj = OCP(kind='Kafka') self.kafka_connect_obj = OCP(kind="KafkaConnect") self.kafka_bridge_obj = OCP(kind="KafkaBridge") self.kafka_topic_obj = OCP(kind="KafkaTopic") self.kafka_user_obj = OCP(kind="KafkaUser") self.amq_is_setup = False self.messaging = False self._clone_amq() def _clone_amq(self): """ clone the amq repo """ self.dir = tempfile.mkdtemp(prefix='amq_') try: log.info(f'cloning amq in {self.dir}') git_clone_cmd = f'git clone -b {self.branch} {self.repo} ' run(git_clone_cmd, shell=True, cwd=self.dir, check=True) self.amq_dir = "strimzi-kafka-operator/install/cluster-operator/" self.amq_kafka_pers_yaml = "strimzi-kafka-operator/examples/kafka/kafka-persistent.yaml" self.amq_kafka_connect_yaml = "strimzi-kafka-operator/examples/connect/kafka-connect.yaml" self.amq_kafka_bridge_yaml = "strimzi-kafka-operator/examples/bridge/kafka-bridge.yaml" self.kafka_topic_yaml = "strimzi-kafka-operator/examples/topic/kafka-topic.yaml" self.kafka_user_yaml = "strimzi-kafka-operator/examples/user/kafka-user.yaml" self.hello_world_producer_yaml = constants.HELLO_WORLD_PRODUCER_YAML self.hello_world_consumer_yaml = constants.HELLO_WORLD_CONSUMER_YAML except (CommandFailed, CalledProcessError) as cf: log.error('Error during cloning of amq repository') raise cf def create_namespace(self, namespace): """ create namespace for amq Args: namespace (str): Namespace for amq pods """ self.ocp.new_project(namespace) def setup_amq_cluster_operator(self, namespace=constants.AMQ_NAMESPACE): """ Function to setup amq-cluster_operator, the file is pulling from github it will make sure cluster-operator pod is running Args: namespace (str): Namespace for AMQ pods """ # Namespace for amq try: self.create_namespace(namespace) except CommandFailed as ef: if f'project.project.openshift.io "{namespace}" already exists' not in str( ef): raise ef # Create strimzi-cluster-operator pod run( f"for i in `(ls strimzi-kafka-operator/install/cluster-operator/)`;" f"do sed 's/{namespace}/myproject/g' strimzi-kafka-operator/install/cluster-operator/$i;done", shell=True, check=True, cwd=self.dir) run(f'oc apply -f {self.amq_dir} -n {namespace}', shell=True, check=True, cwd=self.dir) time.sleep(10) # Check strimzi-cluster-operator pod created if self.is_amq_pod_running(pod_pattern="cluster-operator", expected_pods=1): log.info("strimzi-cluster-operator pod is in running state") else: raise ResourceWrongStatusException( "strimzi-cluster-operator pod is not getting to running state") def is_amq_pod_running(self, pod_pattern, expected_pods, namespace=constants.AMQ_NAMESPACE): """ The function checks if provided pod_pattern finds a pod and if the status is running or not Args: pod_pattern (str): the pattern for pod expected_pods (int): Number of pods namespace (str): Namespace for amq pods Returns: bool: status of pod: True if found pod is running """ _rc = True for pod in TimeoutSampler(300, 10, get_pod_name_by_pattern, pod_pattern, namespace): try: if pod is not None and len(pod) == expected_pods: amq_pod = pod break except IndexError as ie: log.error(" pod not ready yet") raise ie # checking pod status for pod in amq_pod: if (self.pod_obj.wait_for_resource( condition='Running', resource_name=pod, timeout=1600, sleep=30, )): log.info(f"{pod} pod is up and running") else: _rc = False log.error(f"{pod} pod is not running") return _rc def setup_amq_kafka_persistent(self, sc_name, size=100, replicas=3): """ Function to setup amq-kafka-persistent, the file is pulling from github it will make kind: Kafka and will make sure the status is running Args: sc_name (str): Name of sc size (int): Size of the storage in Gi replicas (int): Number of kafka and zookeeper pods to be created return : kafka_persistent """ try: kafka_persistent = templating.load_yaml( os.path.join(self.dir, self.amq_kafka_pers_yaml)) kafka_persistent['spec']['kafka']['replicas'] = replicas kafka_persistent['spec']['kafka']['storage']['volumes'][0][ 'class'] = sc_name kafka_persistent['spec']['kafka']['storage']['volumes'][0][ 'size'] = f"{size}Gi" kafka_persistent['spec']['zookeeper']['replicas'] = replicas kafka_persistent['spec']['zookeeper']['storage']['class'] = sc_name kafka_persistent['spec']['zookeeper']['storage'][ 'size'] = f"{size}Gi" self.kafka_persistent = OCS(**kafka_persistent) self.kafka_persistent.create() except (CommandFailed, CalledProcessError) as cf: log.error('Failed during setup of AMQ Kafka-persistent') raise cf time.sleep(40) if self.is_amq_pod_running( pod_pattern="my-cluster-zookeeper", expected_pods=replicas) and self.is_amq_pod_running( pod_pattern="my-cluster-kafka", expected_pods=replicas): return self.kafka_persistent else: raise ResourceWrongStatusException( "my-cluster-kafka and my-cluster-zookeeper " "Pod is not getting to running state") def setup_amq_kafka_connect(self): """ The function is to setup amq-kafka-connect, the yaml file is pulling from github it will make kind: KafkaConnect and will make sure the status is running Returns: kafka_connect object """ try: kafka_connect = templating.load_yaml( os.path.join(self.dir, self.amq_kafka_connect_yaml)) self.kafka_connect = OCS(**kafka_connect) self.kafka_connect.create() except (CommandFailed, CalledProcessError) as cf: log.error('Failed during setup of AMQ KafkaConnect') raise cf if self.is_amq_pod_running(pod_pattern="my-connect-cluster-connect", expected_pods=1): return self.kafka_connect else: raise ResourceWrongStatusException( "my-connect-cluster-connect pod is not getting to running state" ) def setup_amq_kafka_bridge(self): """ Function to setup amq-kafka, the file file is pulling from github it will make kind: KafkaBridge and will make sure the pod status is running Return: kafka_bridge object """ try: kafka_bridge = templating.load_yaml( os.path.join(self.dir, self.amq_kafka_bridge_yaml)) self.kafka_bridge = OCS(**kafka_bridge) self.kafka_bridge.create() except (CommandFailed, CalledProcessError) as cf: log.error('Failed during setup of AMQ KafkaConnect') raise cf # Making sure the kafka_bridge is running if self.is_amq_pod_running(pod_pattern="my-bridge-bridge", expected_pods=1): return self.kafka_bridge else: raise ResourceWrongStatusException( "kafka_bridge_pod pod is not getting to running state") def create_kafka_topic(self, name='my-topic', partitions=1, replicas=1): """ Creates kafka topic Args: name (str): Name of the kafka topic partitions (int): Number of partitions replicas (int): Number of replicas Return: kafka_topic object """ try: kafka_topic = templating.load_yaml( os.path.join(self.dir, self.kafka_topic_yaml)) kafka_topic["metadata"]["name"] = name kafka_topic["spec"]["partitions"] = partitions kafka_topic["spec"]["replicas"] = replicas self.kafka_topic = OCS(**kafka_topic) self.kafka_topic.create() except (CommandFailed, CalledProcessError) as cf: log.error('Failed during creating of Kafka topic') raise cf # Making sure kafka topic created if self.kafka_topic_obj.get(resource_name=name): return self.kafka_topic else: raise ResourceWrongStatusException("kafka topic is not created") def create_kafka_user(self, name="my-user"): """ Creates kafka user Args: name (str): Name of the kafka user Return: kafka_user object """ try: kafka_user = templating.load_yaml( os.path.join(self.dir, self.kafka_user_yaml)) kafka_user["metadata"]["name"] = name self.kafka_user = OCS(**kafka_user) self.kafka_user.create() except (CommandFailed, CalledProcessError) as cf: log.error('Failed during creating of Kafka user') raise cf # Making sure kafka user created if self.kafka_user_obj.get(resource_name=name): return self.kafka_user else: raise ResourceWrongStatusException("kafka user is not created") def create_producer_pod(self, num_of_pods=1, value='10000'): """ Creates producer pods Args: num_of_pods (int): Number of producer pods to be created value (str): Number of the messages to be sent Returns: producer pod object """ try: producer_pod = templating.load_yaml( constants.HELLO_WORLD_PRODUCER_YAML) producer_pod["spec"]["replicas"] = num_of_pods producer_pod["spec"]["template"]["spec"]["containers"][0]["env"][ 4]["value"] = value self.producer_pod = OCS(**producer_pod) self.producer_pod.create() except (CommandFailed, CalledProcessError) as cf: log.error('Failed during creation of producer pod') raise cf # Making sure the producer pod is running if self.is_amq_pod_running(pod_pattern="hello-world-producer", expected_pods=num_of_pods): return self.producer_pod else: raise ResourceWrongStatusException( "producer pod is not getting to running state") def create_consumer_pod(self, num_of_pods=1, value='10000'): """ Creates producer pods Args: num_of_pods (int): Number of consumer pods to be created value (str): Number of messages to be received Returns: consumer pod object """ try: consumer_pod = templating.load_yaml( constants.HELLO_WORLD_CONSUMER_YAML) consumer_pod["spec"]["replicas"] = num_of_pods consumer_pod["spec"]["template"]["spec"]["containers"][0]["env"][ 4]["value"] = value self.consumer_pod = OCS(**consumer_pod) self.consumer_pod.create() except (CommandFailed, CalledProcessError) as cf: log.error('Failed during creation of consumer pod') raise cf # Making sure the producer pod is running if self.is_amq_pod_running(pod_pattern="hello-world-consumer", expected_pods=num_of_pods): return self.consumer_pod else: raise ResourceWrongStatusException( "consumer pod is not getting to running state") def validate_msg(self, pod, namespace=constants.AMQ_NAMESPACE, value='10000', since_time=1800): """ Validate if messages are sent or received Args: pod (str): Name of the pod namespace (str): Namespace of the pod value (str): Number of messages are sent since_time (int): Number of seconds to required to sent the msg Returns: bool : True if all messages are sent/received """ cmd = f"oc logs -n {namespace} {pod} --since={since_time}s" msg = run_cmd(cmd) if msg.find(f"Hello world - {int(value) - 1} ") is -1: return False else: return True def validate_messages_are_produced(self, namespace=constants.AMQ_NAMESPACE, value='10000', since_time=1800): """ Validates if all messages are sent in producer pod Args: namespace (str): Namespace of the pod value (str): Number of messages are sent since_time (int): Number of seconds to required to sent the msg Raises exception on failures """ # ToDo: Support multiple topics and users producer_pod_objs = [ get_pod_obj(pod) for pod in get_pod_name_by_pattern( 'hello-world-produce', namespace) ] for pod in producer_pod_objs: for msg in TimeoutSampler(900, 30, self.validate_msg, pod.name, namespace, value, since_time): if msg: break log.error("Few messages are not sent") raise Exception("All messages are not sent from the producer pod") def validate_messages_are_consumed(self, namespace=constants.AMQ_NAMESPACE, value='10000', since_time=1800): """ Validates if all messages are received in consumer pod Args: namespace (str): Namespace of the pod value (str): Number of messages are recieved since_time (int): Number of seconds to required to receive the msg Raises exception on failures """ # ToDo: Support multiple topics and users consumer_pod_objs = [ get_pod_obj(pod) for pod in get_pod_name_by_pattern( 'hello-world-consumer', namespace) ] for pod in consumer_pod_objs: for msg in TimeoutSampler(900, 30, self.validate_msg, pod.name, namespace, value, since_time): if msg: log.info( "Consumer pod received all messages sent by producer") break log.error("Few messages are not received") raise Exception("Consumer pod received all messages sent by producer") def run_in_bg(self, namespace=constants.AMQ_NAMESPACE, value='10000', since_time=1800): """ Validate messages are produced and consumed in bg Args: namespace (str): Namespace of the pod value (str): Number of messages to be sent and received since_time (int): Number of seconds to required to sent and receive msg """ # Todo: Check for each messages sent and received log.info("Running open messages on pod in bg") threads = [] thread1 = Thread(target=self.validate_messages_are_produced, args=(namespace, value, since_time)) thread1.start() time.sleep(10) threads.append(thread1) thread2 = Thread(target=self.validate_messages_are_consumed, args=(namespace, value, since_time)) thread2.start() time.sleep(10) threads.append(thread2) return threads # ToDo: Install helm and get kafka metrics def create_messaging_on_amq(self, topic_name='my-topic', user_name="my-user", partitions=1, replicas=1, num_of_producer_pods=1, num_of_consumer_pods=1, value='10000'): """ Creates workload using Open Messaging tool on amq cluster Args: topic_name (str): Name of the topic to be created user_name (str): Name of the user to be created partitions (int): Number of partitions of topic replicas (int): Number of replicas of topic num_of_producer_pods (int): Number of producer pods to be created num_of_consumer_pods (int): Number of consumer pods to be created value (str): Number of messages to be sent and received """ self.create_kafka_topic(topic_name, partitions, replicas) self.create_kafka_user(user_name) self.create_producer_pod(num_of_producer_pods, value) self.create_consumer_pod(num_of_consumer_pods, value) self.messaging = True def setup_amq_cluster(self, sc_name, namespace=constants.AMQ_NAMESPACE, size=100, replicas=3): """ Creates amq cluster with persistent storage. Args: sc_name (str): Name of sc namespace (str): Namespace for amq cluster size (int): Size of the storage replicas (int): Number of kafka and zookeeper pods to be created """ self.setup_amq_cluster_operator(namespace) self.setup_amq_kafka_persistent(sc_name, size, replicas) self.setup_amq_kafka_connect() self.setup_amq_kafka_bridge() self.amq_is_setup = True return self def cleanup(self, namespace=constants.AMQ_NAMESPACE): """ Clean up function, will start to delete from amq cluster operator then amq-connector, persistent, bridge, at the end it will delete the created namespace Args: namespace (str): Created namespace for amq """ if self.amq_is_setup: if self.messaging: self.consumer_pod.delete() self.producer_pod.delete() self.kafka_user.delete() self.kafka_topic.delete() self.kafka_persistent.delete() self.kafka_connect.delete() self.kafka_bridge.delete() run_cmd(f'oc delete -f {self.amq_dir}', shell=True, check=True, cwd=self.dir) run_cmd(f'oc delete project {namespace}') # Reset namespace to default switch_to_default_rook_cluster_project() self.ns_obj.wait_for_delete(resource_name=namespace)
class Cosbench(object): """ Cosbench S3 benchmark tool """ def __init__(self): """ Initializer function """ self.ns_obj = OCP(kind="namespace") self.namespace = constants.COSBENCH_PROJECT self.configmap_obj = OCP(namespace=self.namespace, kind=constants.CONFIGMAP) self.ocp_obj = OCP(namespace=self.namespace) self.cosbench_config = None self.cosbench_pod = None self.cosbench_dir = mkdtemp(prefix="cosbench-tool-") self.xml_file = "" self.workload_id = "" self.init_container = 1 self.range_selector = "r" self.init_object = 1 mcg_obj = MCG() self.access_key_id = mcg_obj.access_key_id self.access_key = mcg_obj.access_key self.endpoint = ( "http://" + mcg_obj.s3_internal_endpoint.split("/")[2].split(":")[0]) def setup_cosbench(self): """ Setups Cosbench namespace, configmap and pod """ # Create cosbench project self.ns_obj.new_project(project_name=self.namespace) # Create configmap config_data = templating.load_yaml(file=constants.COSBENCH_CONFIGMAP) cosbench_configmap_name = create_unique_resource_name( constants.COSBENCH, "configmap") config_data["metadata"]["name"] = cosbench_configmap_name config_data["metadata"]["namespace"] = self.namespace self.cosbench_config = OCS(**config_data) logger.info( f"Creating Cosbench configmap: {self.cosbench_config.name}") self.cosbench_config.create() self.configmap_obj.wait_for_resource( resource_name=self.cosbench_config.name, column="DATA", condition="4") # Create Cosbench pod cosbench_pod_data = templating.load_yaml(file=constants.COSBENCH_POD) cosbench_pod_data["spec"]["containers"][0]["envFrom"][0][ "configMapRef"]["name"] = self.cosbench_config.name cosbench_pod_name = create_unique_resource_name( constants.COSBENCH, "pod") cosbench_pod_data["metadata"]["name"] = cosbench_pod_name cosbench_pod_data["metadata"]["namespace"] = self.namespace self.cosbench_pod = OCS(**cosbench_pod_data) logger.info(f"Creating Cosbench pod: {self.cosbench_pod.name}") self.cosbench_pod.create() helpers.wait_for_resource_state(resource=self.cosbench_pod, state=constants.STATUS_RUNNING, timeout=300) def _apply_mcg_auth(self, xml_root): """ Applies MCG credentials Args: xml_root (Element): Root element of workload xml """ xml_root[0].set( "config", f"accesskey={self.access_key_id};secretkey={self.access_key};" f"endpoint={self.endpoint};path_style_access=true", ) def run_init_workload( self, prefix, containers, objects, start_container=None, start_object=None, size=64, size_unit="KB", sleep=15, timeout=300, validate=True, ): """ Creates specific containers and objects in bulk Args: prefix (str): Prefix of bucket name. containers (int): Number of containers/buckets to be created. objects (int): Number of objects to be created on each bucket. start_container (int): Start of containers. Default: 1. start_object (int): Start of objects. Default: 1. size (int): Size of each objects. size_unit (str): Object size unit (B/KB/MB/GB) sleep (int): Sleep in seconds. timeout (int): Timeout in seconds. validate (bool): Validates whether init and prepare is completed. Returns: Tuple[str, str]: Workload xml and its name """ init_template = """ <workload name="Fill" description="Init and prepare operation"> <storage type="s3" config="" /> <workflow> <workstage name="init-containers"> <work type="init" workers="1" config="" /> </workstage> <workstage name="prepare-objects"> <work type="prepare" workers="16" config="" /> </workstage> </workflow> </workload> """ xml_root, xml_tree = self._create_element_tree(template=init_template) workload_name = xml_root.get("name") self._apply_mcg_auth(xml_root) self.init_container = (start_container if start_container else self.init_container) self.init_object = start_object if start_object else self.init_object init_container_config = self.generate_container_stage_config( self.range_selector, self.init_container, containers, ) init_config = self.generate_stage_config( self.range_selector, self.init_container, containers, self.init_object, objects, ) for stage in xml_root.iter("work"): if stage.get("type") == "init": stage.set("config", f"cprefix={prefix};{init_container_config}") elif stage.get("type") == "prepare": stage.set( "config", f"cprefix={prefix};{init_config};sizes=c({str(size)}){size_unit}", ) self._create_tmp_xml(xml_tree=xml_tree, xml_file_prefix=workload_name) self.submit_workload(workload_path=self.xml_file) self.wait_for_workload(workload_id=self.workload_id, sleep=sleep, timeout=timeout) if validate: self.validate_workload(workload_id=self.workload_id, workload_name=workload_name) else: return self.workload_id, workload_name def run_cleanup_workload( self, prefix, containers, objects, start_container=None, start_object=None, sleep=15, timeout=300, validate=True, ): """ Deletes specific objects and containers in bulk. Args: prefix (str): Prefix of bucket name. containers (int): Number of containers/buckets to be created. objects (int): Number of objects to be created on each bucket. start_container (int): Start of containers. Default: 1. start_object (int): Start of objects. Default: 1. sleep (int): Sleep in seconds. timeout (int): Timeout in seconds. validate (bool): Validates whether cleanup and dispose is completed. Returns: Tuple[str, str]: Workload xml and its name """ cleanup_template = """ <workload name="Cleanup" description="Cleanup and Dispose"> <storage type="s3" config="" /> <workflow> <workstage name="cleanup-objects"> <work type="cleanup" workers="4" config="" /> </workstage> <workstage name="dispose-containers"> <work type="dispose" workers="1" config="" /> </workstage> </workflow> </workload> """ xml_root, xml_tree = self._create_element_tree( template=cleanup_template) workload_name = xml_root.get("name") self._apply_mcg_auth(xml_root) self.init_container = (start_container if start_container else self.init_container) self.init_object = start_object if start_object else self.init_object cleanuo_config = self.generate_stage_config( self.range_selector, self.init_container, containers, self.init_object, objects, ) for stage in xml_root.iter("work"): if stage.get("type") == "cleanup": stage.set( "config", f"cprefix={prefix};{cleanuo_config}", ) elif stage.get("type") == "dispose": stage.set("config", f"cprefix={prefix};{cleanuo_config}") self._create_tmp_xml(xml_tree=xml_tree, xml_file_prefix=workload_name) self.submit_workload(workload_path=self.xml_file) self.wait_for_workload(workload_id=self.workload_id, sleep=sleep, timeout=timeout) if validate: self.validate_workload(workload_id=self.workload_id, workload_name=workload_name) else: return self.workload_id, workload_name def run_main_workload( self, operation_type, prefix, containers, objects, workers=4, selector="s", start_container=None, start_object=None, size=64, size_unit="KB", sleep=15, timeout=300, extend_objects=None, validate=True, result=True, ): """ Creates and runs main Cosbench workload. Args: operation_type (dict): Cosbench operation and its ratio. Operation (str): Supported ops are read, write, list and delete. Ratio (int): Percentage of each operation. Should add up to 100. workers (int): Number of users to perform operations. containers (int): Number of containers/buckets to be created. objects (int): Number of objects to be created on each bucket. selector (str): The way object is accessed/selected. u=uniform, r=range, s=sequential. prefix (str): Prefix of bucket name. start_container (int): Start of containers. Default: 1. start_object (int): Start of objects. Default: 1. size (int): Size of each objects. size_unit (str): Object size unit (B/KB/MB/GB) sleep (int): Sleep in seconds timeout (int): Timeout in seconds validate (bool): Validates whether each stage is completed extend_objects (int): Extends the total number of objects to prevent overlap. Use only for Write and Delete operations. result (bool): Get performance results when running workload is completed. Returns: Tuple[str, str]: Workload xml and its name """ main_template = """ <workload name="workload_name" description="Main workload"> <storage type="s3" config="" /> <workflow> <workstage name="Main"> <work name="work_name" workers="4" division="object" runtime="60"> </work> </workstage> </workflow> </workload> """ xml_root, xml_tree = self._create_element_tree(template=main_template) workload_name = xml_root.get("name") self._apply_mcg_auth(xml_root) start_container = start_container if start_container else self.init_container start_object = start_object if start_object else self.init_object for stage in xml_root.iter("work"): stage.set("workers", f"{workers}") for operation, ratio in operation_type.items(): if operation == "write" or "delete": if extend_objects: start_object = objects + 1 stage_config = self.generate_stage_config( selector, start_container, containers, start_object, extend_objects, ) attributes = { "type": f"{operation}", "ratio": f"{ratio}", "config": f"cprefix={prefix};{stage_config};sizes=c({str(size)}){size_unit}", } ElementTree.SubElement(stage, "operation", attributes) else: stage_config = self.generate_stage_config( selector, start_container, containers, start_object, objects, ) attributes = { "type": f"{operation}", "ratio": f"{ratio}", "config": f"cprefix={prefix};{stage_config};sizes=c({str(size)}){size_unit}", } ElementTree.SubElement(stage, "operation", attributes) else: stage_config = self.generate_stage_config( selector, start_container, containers, start_object, objects, ) attributes = { "type": f"{operation}", "ratio": f"{ratio}", "config": f"cprefix={prefix};{stage_config}", } ElementTree.SubElement(stage, "operation", attributes) self._create_tmp_xml(xml_tree=xml_tree, xml_file_prefix=workload_name) self.submit_workload(workload_path=self.xml_file) self.wait_for_workload(workload_id=self.workload_id, sleep=sleep, timeout=timeout) if validate: self.validate_workload(workload_id=self.workload_id, workload_name=workload_name) else: return self.workload_id, workload_name if result: throughput, bandwidth = self.get_performance_result( workload_id=self.workload_id, workload_name=workload_name, size=size, ) return throughput, bandwidth else: return self.workload_id, workload_name @staticmethod def generate_stage_config(selector, start_container, end_container, start_objects, end_object): """ Generates config which is used in stage creation Args: selector (str): The way object is accessed/selected. u=uniform, r=range, s=sequential. start_container (int): Start of containers end_container (int): End of containers start_objects (int): Start of objects end_object (int): End of objects Returns: (str): Container and object configuration """ xml_config = ( f"containers={selector}({str(start_container)},{str(end_container)});" f"objects={selector}({str(start_objects)},{str(end_object)})") return xml_config @staticmethod def generate_container_stage_config(selector, start_container, end_container): """ Generates container config which creates buckets in bulk Args: selector (str): The way object is accessed/selected. u=uniform, r=range, s=sequential. start_container (int): Start of containers end_container (int): End of containers Returns: (str): Container and object configuration """ container_config = ( f"containers={selector}({str(start_container)},{str(end_container)});" ) return container_config def _create_tmp_xml(self, xml_tree, xml_file_prefix): """ Creates a xml file and writes the workload Args: xml_file_prefix (str): Prefix of xml file xml_tree (Element): Element tree """ self.xml_file = NamedTemporaryFile( dir=self.cosbench_dir, prefix=f"{xml_file_prefix}", suffix=".xml", delete=False, ).name logger.info(self.xml_file) xml_tree.write(self.xml_file) @staticmethod def _create_element_tree(template): """ Creates element tree and root element of xml Args: template (str): Template of Cosbench workload Returns: Tuple[Element, ElementTree]: Root element and element tree of xml """ xml_root = ElementTree.fromstring(text=template) xml_tree = ElementTree.ElementTree(element=xml_root) return xml_root, xml_tree def _copy_workload(self, workload_path): """ Copies workload xml to Cosbench pod Args: workload_path (str): Absolute path of xml to copy """ self.ocp_obj.exec_oc_cmd( command=f"cp {workload_path} {self.cosbench_pod.name}:/cos", out_yaml_format=False, timeout=180, ) def submit_workload(self, workload_path): """ Submits Cosbench xml to initiate workload Args: workload_path (str): Absolute path of xml to submit """ self._copy_workload(workload_path=workload_path) workload = os.path.split(workload_path)[1] self._cosbench_cli(workload) @retry(AttributeError, tries=15, delay=5, backoff=1) def _cosbench_cli(self, workload): """ Runs Cosbench cli to initiate workload Args: workload (str): Workload file """ submit_key = "Accepted with ID" cobench_pod_obj = get_pod_obj(name=self.cosbench_pod.name, namespace=self.namespace) submit = cobench_pod_obj.exec_cmd_on_pod( command=f"/cos/cli.sh submit /cos/{workload}", out_yaml_format=True, timeout=180, ) if submit_key in submit.keys(): self.workload_id = submit[submit_key] else: assert f"Failed to submit the workload, ID not found. stdout: {submit}" def wait_for_workload(self, workload_id, sleep=1, timeout=60): """ Waits for the cosbench workload to complete Args: workload_id (str): ID of cosbench workload sleep: sleep in seconds timeout: timeout in seconds to check if mirroring Returns: bool: Whether cosbench workload processed successfully """ logger.info(f"Waiting for workload {workload_id} to be processed") pattern = f"sucessfully processed workload {workload_id}" try: for ret in TimeoutSampler( timeout=timeout, sleep=sleep, func=get_pod_logs, pod_name=self.cosbench_pod.name, namespace=self.namespace, ): if re.search(pattern=pattern, string=ret): break logger.info( f"Verified: Workload {workload_id} processed successfully") return True except TimeoutExpiredError: logger.error( f"Workload {workload_id} did not complete. Dumping cosbench pod log" ) # Log cosbench pod for debugging purpose cosbench_log = get_pod_logs(pod_name=self.cosbench_pod.name, namespace=self.namespace) logger.debug(cosbench_log) return False def validate_workload(self, workload_id, workload_name): """ Validates each stage of cosbench workload Args: workload_id (str): ID of cosbench workload workload_name (str): Name of the workload Raises: UnexpectedBehaviour: When workload csv is incorrect/malformed. """ workload_csv = self.get_result_csv(workload_id=workload_id, workload_name=workload_name) with open(workload_csv, "r") as file: reader = csv.reader(file) header = next(reader) if header is not None: # Iterate over each row after the header logger.info( f"Verifying whether each stage of workload {workload_id} completed" ) for row in reader: if row[16] == "completed": logger.info(f"Stage {row[0]} completed successfully") else: assert ( f"Failed: Stage {row[0]} did not complete. Status {row[16]}" ) else: raise UnexpectedBehaviour( f"Workload csv is incorrect/malformed. Dumping csv {reader}" ) def get_result_csv(self, workload_id, workload_name): """ Gets cosbench workload result csv Args: workload_id (str): ID of cosbench workload workload_name (str): Name of the workload Returns: str: Absolute path of the result csv """ archive_file = f"{workload_id}-{workload_name}" cmd = ( f"cp {self.cosbench_pod.name}:/cos/archive/{archive_file}/{archive_file}.csv " f"{self.cosbench_dir}/{archive_file}.csv ") self.ocp_obj.exec_oc_cmd( command=cmd, out_yaml_format=False, timeout=300, ) return f"{self.cosbench_dir}/{archive_file}.csv" def cleanup(self): """ Cosbench cleanup """ switch_to_project(constants.COSBENCH_PROJECT) logger.info("Deleting Cosbench pod, configmap and namespace") self.cosbench_pod.delete() self.cosbench_config.delete() self.ns_obj.delete_project(self.namespace) self.ns_obj.wait_for_delete(resource_name=self.namespace, timeout=90) def get_performance_result(self, workload_name, workload_id, size): workload_file = self.get_result_csv(workload_id=workload_id, workload_name=workload_name) throughput_data = {} bandwidth_data = {} with open(workload_file, "r") as file: reader = csv.reader(file) header = next(reader) if header is not None: for row in reader: throughput_data[row[1]] = row[13] bandwidth_data[row[1]] = row[14] else: raise UnexpectedBehaviour( f"Workload csv is incorrect/malformed. Dumping csv {reader}" ) # Store throughput data on csv file log_path = f"{self.cosbench_dir}" with open(f"{log_path}/{workload_name}-{size}-throughput.csv", "a") as fd: csv_obj = csv.writer(fd) for k, v in throughput_data.items(): csv_obj.writerow([k, v]) logger.info( f"Throughput data present in {log_path}/{workload_name}-{size}-throughput.csv" ) # Store bandwidth data on csv file with open(f"{log_path}/{workload_name}-{size}-bandwidth.csv", "a") as fd: csv_obj = csv.writer(fd) for k, v in bandwidth_data.items(): csv_obj.writerow([k, v]) logger.info( f"Bandwidth data present in {log_path}/{workload_name}-{size}-bandwidth.csv" ) return throughput_data, bandwidth_data def cosbench_full(self): """ Run full Cosbench workload """ bucket_prefix = "bucket-" buckets = 10 objects = 1000 # Operations to perform and its ratio(%) operations = {"read": 50, "write": 50} # Deployment of cosbench self.setup_cosbench() # Create initial containers and objects self.run_init_workload(prefix=bucket_prefix, containers=buckets, objects=objects, validate=True) # Start measuring time start_time = datetime.now() # Run main workload self.run_main_workload( operation_type=operations, prefix=bucket_prefix, containers=buckets, objects=objects, validate=True, timeout=10800, ) # Calculate the total run time of Cosbench workload end_time = datetime.now() diff_time = end_time - start_time logger.info(f"Cosbench workload completed after {diff_time}") # Dispose containers and objects self.run_cleanup_workload(prefix=bucket_prefix, containers=buckets, objects=objects, validate=True)
def test_del_mon_svc( self, multi_pvc_factory, validate_all_mon_svc_are_up_at_teardown ): """ Test to verify same mon comes up and running after deleting mon services manually and joins the quorum 1. Delete the mon services 2. Restart the rook operator 3. Make sure all mon pods are running, and same service or endpoints are running 4. Make sure ceph health Ok and storage pods are running 5. Create PVC, should succeeded. """ self.sanity_helpers = Sanity() # Get all mon services mon_svc_before = get_services_by_label( label=constants.MON_APP_LABEL, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) # Get all mon pods mon_pods = get_mon_pods() # Delete the mon services one by one svc_obj = OCP( kind=constants.SERVICE, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE ) mon_svc_ip_before = [] for svc in mon_svc_before: svc_name = svc["metadata"]["name"] mon_svc_ip_before.append(svc["spec"]["clusterIP"]) log.info(f"Delete mon service {svc_name}") svc_obj.delete(resource_name=svc_name) # Verify mon services deleted svc_obj.wait_for_delete(resource_name=svc_name) # Restart the rook-operator pod operator_pod_obj = get_operator_pods() delete_pods(pod_objs=operator_pod_obj) POD_OBJ.wait_for_resource( condition=constants.STATUS_RUNNING, selector=constants.OPERATOR_LABEL ) # Verify same mon services are created again for svc in mon_svc_before: svc_name = svc["metadata"]["name"] svc_obj.check_resource_existence( should_exist=True, timeout=300, resource_name=svc_name ) log.info("Same old mon services are recreated") # Validate all mons are running log.info("Validate all mons are up and running") POD_OBJ.wait_for_resource( condition=constants.STATUS_RUNNING, selector=constants.MON_APP_LABEL, resource_count=len(mon_pods), timeout=600, sleep=3, ) # Validate same mon services are running log.info("Validate same mon services are running") mon_svc_after = get_services_by_label( label=constants.MON_APP_LABEL, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) mon_svc_ip_after = [svc["spec"]["clusterIP"] for svc in mon_svc_after] assert len(set(mon_svc_ip_after) ^ set(mon_svc_ip_before)) == 0, ( "Different mon services are running. " f"Before mon services list: {mon_svc_ip_before}, " f"After mon services list: {mon_svc_ip_after}" ) log.info("Same old mon services are running and all mons are in running state") # Verify everything running fine log.info("Verifying All resources are Running and matches expected result") self.sanity_helpers.health_check(tries=120) # Validate all storage pods are running wait_for_storage_pods() # Create and delete resources self.sanity_helpers.create_pvc_delete(multi_pvc_factory=multi_pvc_factory)
class BenchmarkOperator(object): """ Workload operation using Benchmark-Operator """ def __init__(self, **kwargs): """ Initializer function. Initialize object variables, clone the benchmark operator repo. and label the worker nodes. Args: kwargs (dict): Following kwargs are valid repo: benchmark-operator repo to used - a github link branch: branch to use from the repo Example Usage: r1 = BenchmarkOperator() r1.deploy() # use oc apply to apply custom modified bench my_custom_bench = my_custom_bench.yaml run_cmd('oc apply -f my_custom_bench') """ log.info("Initialize the benchmark-operator object") self.args = kwargs self.repo = self.args.get("repo", BMO_REPO) self.branch = self.args.get("branch", "master") # the namespace is a constant for the benchmark-operator self.namespace = BMO_NAME self.pgsql_is_setup = False self.ocp = OCP() self.ns_obj = OCP(kind="namespace") self.pod_obj = OCP(namespace=BMO_NAME, kind="pod") # list of worker nodes to label self.worker_nodes = get_worker_nodes() self._clone_operator() self.dir += f"/{BMO_NAME}" # to use the cache dropping pod, worker nodes need to be labeled. log.info("Labeling the worker nodes for cache-dropping enable.") try: helpers.label_worker_node( self.worker_nodes, label_key=BMO_LABEL, label_value="yes" ) except CommandFailed: # this is probably because of the nodes are already labeled, so, # checking if nodes are labeled and continue anyway. result = self.pod_obj.exec_oc_cmd(f"get node -l {BMO_LABEL}") found = [node for node in self.worker_nodes if re.search(node, result)] if len(found) == len(self.worker_nodes): log.info("All worker nodes are labeled") else: log.warning("Labeling nodes failed, Not all workers node are labeled !") def _clone_operator(self): """ clone the benchmark-operator repo into temp directory """ self.dir = tempfile.mkdtemp(prefix=f"{BMO_NAME}_") try: log.info(f"Cloning {BMO_NAME} in {self.dir}") git_clone_cmd = f"git clone -b {self.branch} {self.repo} --depth 1" run(git_clone_cmd, shell=True, cwd=self.dir, check=True) except (CommandFailed, CalledProcessError) as cf: log.error(f"Error during cloning of {BMO_NAME} repository") raise cf def _is_ready(self): """ Check the status of the benchmark-operator to verify it is Ready Returns: bool : True if all containers ar up, other false. """ OK = 1 result = self.pod_obj.exec_oc_cmd(f"get pod -n {BMO_NAME} -o json") for cnt in result.get("items")[0].get("status").get("containerStatuses"): if not cnt.get("ready"): OK = 0 if not OK: log.warning("Benchmark Operator is not ready") return False else: return True def deploy(self): """ Deploy the benchmark-operator """ log.info("Deploy the benchmark-operator project") try: run("make deploy", shell=True, check=True, cwd=self.dir) except Exception as ex: log.error(f"Failed to deploy benchmark operator : {ex}") log.info("Wait for the benchmark-operator deployment be available") try: cmd = f'wait --for=condition=available "{BMO_DEPLOYMENT}" -n {BMO_NAME} --timeout=300s' self.pod_obj.exec_oc_cmd(cmd) # At this point the benchmark operator pod is ready, but we need to # verifying that all containers in the pod are ready (up to 30 sec.) sample = TimeoutSampler(timeout=30, sleep=3, func=self._is_ready) if not sample.wait_for_func_status(True): raise Exception("Not all the containers are ready") except Exception as ex: log.error(f"Failed to wait for benchmark operator : {ex}") log.info("the benchmark Operator is ready") def cleanup(self): """ Clean up the cluster from the benchmark operator project """ # Reset namespace to default switch_to_default_rook_cluster_project() log.info("Delete the benchmark-operator project") run("make undeploy", shell=True, check=True, cwd=self.dir) # Wait until the benchmark-operator project deleted self.ns_obj.wait_for_delete(resource_name=self.namespace, timeout=180) # remove from workers the label used for cache dropping log.info("Remove labels from worker nodes.") helpers.remove_label_from_worker_node(self.worker_nodes, label_key=BMO_LABEL) # wait another 10 sec. after cleanup done. time.sleep(10) def get_uuid(self, benchmark): """ Getting the UUID of the test. when benchmark-operator used for running a benchmark tests, each run get its own UUID, so the results in the elastic-search server can be sorted. Args: benchmark (str): the name of the main pod in the test Return: str: the UUID of the test or '' if UUID not found in the benchmark pod """ for output in TimeoutSampler( timeout=30, sleep=5, func=self.pod_obj.exec_oc_cmd, command=f"exec {benchmark} -- env", ): if output != "": for line in output.split(): if re.match("uuid=", line): uuid = line.split("=")[1] log.info(f"The UUID of the test is : {uuid}") return uuid return ""
class CouchBase(PillowFight): """ CouchBase workload operation """ def __init__(self, **kwargs): """ Initializer function """ super().__init__(**kwargs) self.args = kwargs self.pod_obj = OCP(kind="pod") self.ns_obj = OCP(kind="namespace") self.couchbase_pod = OCP(kind="pod") self.create_namespace(namespace=constants.COUCHBASE_OPERATOR) self.cb_create_cb_secret = False self.cb_create_cb_cluster = False self.cb_create_bucket = False def create_namespace(self, namespace): """ create namespace for couchbase Args: namespace (str): Namespace for deploying couchbase pods """ try: self.ns_obj.new_project(namespace) except CommandFailed as ef: log.info("Already present") if f'project.project.openshift.io "{namespace}" already exists' not in str( ef): raise ef def couchbase_operatorgroup(self): """ Creates an operator group for Couchbase """ operatorgroup_yaml = templating.load_yaml( constants.COUCHBASE_OPERATOR_GROUP_YAML) self.operatorgroup_yaml = OCS(**operatorgroup_yaml) self.operatorgroup_yaml.create() def couchbase_subscription(self): """ Creates subscription for Couchbase operator """ # Create an operator group for Couchbase log.info("Creating operator group for couchbase") self.couchbase_operatorgroup() subscription_yaml = templating.load_yaml( constants.COUCHBASE_OPERATOR_SUBSCRIPTION_YAML) self.subscription_yaml = OCS(**subscription_yaml) self.subscription_yaml.create() # Wait for the CSV to reach succeeded state cb_csv = self.get_couchbase_csv() cb_csv_obj = CSV(resource_name=cb_csv, namespace=constants.COUCHBASE_OPERATOR) cb_csv_obj.wait_for_phase("Succeeded", timeout=720) def get_couchbase_csv(self): """ " Get the Couchbase CSV object Returns: CSV: Couchbase CSV object Raises: CSVNotFound: In case no CSV found. """ cb_package_manifest = PackageManifest( resource_name="couchbase-enterprise-certified") cb_enter_csv = cb_package_manifest.get_current_csv( channel="stable", csv_pattern=constants.COUCHBASE_CSV_PREFIX) return cb_enter_csv def create_cb_secrets(self): """ " Create secrets for running Couchbase workers """ cb_secrets = templating.load_yaml(constants.COUCHBASE_WORKER_SECRET) self.cb_secrets = OCS(**cb_secrets) self.cb_secrets.create() log.info("Successfully created secrets for Couchbase") self.cb_create_cb_secret = True def create_cb_cluster(self, replicas=1, sc_name=None): """ Deploy a Couchbase server using Couchbase operator Once the couchbase operator is running, we need to wait for the worker pods to be up. Once the Couchbase worker pods are up, pillowfight task is started. After the pillowfight task has finished, the log is collected and analyzed. Raises: Exception: If pillowfight results indicate that a minimum performance level is not reached (1 second response time, less than 1000 ops per second) """ log.info("Creating Couchbase worker pods...") cb_example = templating.load_yaml(constants.COUCHBASE_WORKER_EXAMPLE) if storagecluster_independent_check(): cb_example["spec"]["volumeClaimTemplates"][0]["spec"][ "storageClassName"] = constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_RBD cb_example["spec"]["servers"][0]["size"] = replicas if sc_name: cb_example["spec"]["volumeClaimTemplates"][0]["spec"][ "storageClassName"] = sc_name self.cb_example = OCS(**cb_example) self.cb_example.create() self.cb_create_cb_cluster = True # Wait for the Couchbase workers to be running. log.info("Waiting for the Couchbase pods to be Running") self.pod_obj.wait_for_resource( condition="Running", selector="app=couchbase", resource_count=replicas, timeout=900, ) log.info( f"Expected number: {replicas} of couchbase workers reached running state" ) def create_data_buckets(self): """ Create data buckets """ cb_bucket = templating.load_yaml(constants.COUCHBASE_DATA_BUCKET) self.cb_bucket = OCS(**cb_bucket) self.cb_bucket.create() log.info("Successfully created data buckets") self.cb_create_bucket = True def run_workload(self, replicas, num_items=None, num_threads=None, run_in_bg=False): """ Running workload with pillow fight operator Args: replicas (int): Number of pods num_items (int): Number of items to be loaded to the cluster num_threads (int): Number of threads run_in_bg (bool) : Optional run IOs in background """ self.result = None log.info("Running IOs using Pillow-fight") if run_in_bg: executor = ThreadPoolExecutor(1) self.result = executor.submit( PillowFight.run_pillowfights, self, replicas=replicas, num_items=num_items, num_threads=num_threads, ) return self.result PillowFight.run_pillowfights(self, replicas=replicas, num_items=num_items, num_threads=num_threads) def analyze_run(self, skip_analyze=False): """ Analyzing the workload run logs Args: skip_analyze (bool): Option to skip logs analysis """ if not skip_analyze: log.info("Analyzing workload run logs..") PillowFight.analyze_all(self) def respin_couchbase_app_pod(self): """ Respin the couchbase app pod Returns: pod status """ app_pod_list = get_pod_name_by_pattern("cb-example", constants.COUCHBASE_OPERATOR) app_pod = app_pod_list[random.randint(0, len(app_pod_list) - 1)] log.info(f"respin pod {app_pod}") app_pod_obj = get_pod_obj(app_pod, namespace=constants.COUCHBASE_OPERATOR) app_pod_obj.delete(wait=True, force=False) wait_for_resource_state(resource=app_pod_obj, state=constants.STATUS_RUNNING, timeout=300) def get_couchbase_nodes(self): """ Get nodes that contain a couchbase app pod Returns: list: List of nodes """ app_pods_list = get_pod_name_by_pattern("cb-example", constants.COUCHBASE_OPERATOR) app_pod_objs = list() for pod in app_pods_list: app_pod_objs.append( get_pod_obj(pod, namespace=constants.COUCHBASE_OPERATOR)) log.info("Create a list of nodes that contain a couchbase app pod") nodes_set = set() for pod in app_pod_objs: log.info(f"pod {pod.name} located on " f"node {pod.get().get('spec').get('nodeName')}") nodes_set.add(pod.get().get("spec").get("nodeName")) return list(nodes_set) def teardown(self): """ Cleaning up the resources created during Couchbase deployment """ if self.cb_create_cb_secret: self.cb_secrets.delete() if self.cb_create_cb_cluster: self.cb_example.delete() if self.cb_create_bucket: self.cb_bucket.delete() self.subscription_yaml.delete() switch_to_project("default") self.ns_obj.delete_project(constants.COUCHBASE_OPERATOR) self.ns_obj.wait_for_delete(resource_name=constants.COUCHBASE_OPERATOR, timeout=90) PillowFight.cleanup(self) switch_to_default_rook_cluster_project()
class ElasticSearch(object): """ ElasticSearch Environment """ def __init__(self): """ Initializer function """ log.info("Initializing the Elastic-Search environment object") self.namespace = "elastic-system" self.eck_path = "https://download.elastic.co/downloads/eck/1.1.2" self.eck_file = "all-in-one.yaml" self.pvc = "ocs_ci/templates/app-pods/es-pvc.yaml" self.crd = "ocs_ci/templates/app-pods/esq.yaml" self.lspid = None # Creating some different types of OCP objects self.ocp = OCP(kind="pod", resource_name="elastic-operator-0", namespace=self.namespace) self.ns_obj = OCP(kind="namespace", namespace=self.namespace) self.es = OCP(resource_name="quickstart-es-http", namespace=self.namespace) self.elasticsearch = OCP(namespace=self.namespace, kind="elasticsearch") self.password = OCP( kind="secret", resource_name="quickstart-es-elastic-user", namespace=self.namespace, ) # Fetch the all-in-one.yaml from the official repository self._get_eck_file() # Deploy the ECK all-in-one.yaml file self._deploy_eck() # Deploy the Elastic-Search server self._deploy_es() # Verify that ES is Up & Running timeout = 600 while timeout > 0: if self.get_health(): log.info("The ElasticSearch server is ready !") break else: log.warning("The ElasticSearch server is not ready yet") log.info("going to sleep for 30 sec. before next check") time.sleep(30) timeout -= 30 # Starting LocalServer process - port forwarding self.local_server() # Connect to the server self.con = self._es_connect() def _get_eck_file(self): """ Getting the ECK file from the official Elasticsearch web site and store it as a temporary file. Current version is 1.1.2, this need to be update with new versions, after testing it, and also it may need to update the CRD file (esq.yaml) with the new version as well. """ self.dir = tempfile.mkdtemp(prefix="elastic-system_") src_file = f"{self.eck_path}/{self.eck_file}" trg_file = f"{self.dir}/{self.eck_file}" log.info(f"Retrieving the ECK CR file from {src_file} into {trg_file}") try: urllib.request.urlretrieve(src_file, trg_file) except urllib.error.HTTPError as e: log.error(f"Can not connect to {src_file} : {e}") raise e def _deploy_eck(self): """ Deploying the ECK environment for the Elasticsearch, and make sure it is in Running mode """ log.info("Deploying the ECK environment for the ES cluster") self.ocp.apply(f"{self.dir}/{self.eck_file}") for es_pod in TimeoutSampler(300, 10, get_pod_name_by_pattern, "elastic-operator", self.namespace): try: if es_pod[0] is not None: self.eckpod = es_pod[0] log.info(f"The ECK pod {self.eckpod} is ready !") break except IndexError: log.info("ECK operator pod not ready yet") def get_ip(self): """ This function return the IP address of the Elasticsearch cluster. this IP is to use inside the OCP cluster Return str : String that represent the Ip Address. """ return self.es.get()["spec"]["clusterIP"] def get_port(self): """ This function return the port of the Elasticsearch cluster. Return str : String that represent the port. """ return self.es.get()["spec"]["ports"][0]["port"] def _deploy_es(self): log.info("Deploy the PVC for the ElasticSearch cluster") self.ocp.apply(self.pvc) log.info("Deploy the ElasticSearch cluster") self.ocp.apply(self.crd) for es_pod in TimeoutSampler(300, 20, get_pod_name_by_pattern, "quickstart-es-default", self.namespace): try: if es_pod[0] is not None: self.espod = es_pod[0] log.info(f"The ElasticSearch pod {self.espod} Started") break except IndexError: log.info("elasticsearch pod not ready yet") es_pod = OCP(kind="pod", namespace=self.namespace) log.info("Waiting for ElasticSearch to Run") assert es_pod.wait_for_resource( condition=constants.STATUS_RUNNING, resource_name=self.espod, sleep=30, timeout=600, ) log.info("Elastic Search is ready !!!") def get_health(self): """ This method return the health status of the Elasticsearch. Returns: bool : True if the status is green (OK) otherwise - False """ return self.elasticsearch.get( )["items"][0]["status"]["health"] == "green" def get_password(self): """ This method return the password used to connect the Elasticsearch. Returns: str : The password as text """ return base64.b64decode( self.password.get()["data"]["elastic"]).decode("utf-8") def cleanup(self): """ Cleanup the environment from all Elasticsearch components, and from the port forwarding process. """ log.info("Teardown the Elasticsearch environment") log.info(f"Killing the local server process ({self.lspid})") os.kill(self.lspid, signal.SIGKILL) log.info("Deleting all resources") subprocess.run(f"oc delete -f {self.crd}", shell=True) subprocess.run(f"oc delete -f {self.eck_file}", shell=True, cwd=self.dir) self.ns_obj.wait_for_delete(resource_name=self.namespace) def local_server(self): """ Starting sub-process that will do port-forwarding, to allow access from outside the open-shift cluster into the Elasticsearch server. """ cmd = f"oc -n {self.namespace } " cmd += f"port-forward service/quickstart-es-http {self.get_port()}" log.info(f"Going to run : {cmd}") proc = subprocess.Popen(cmd, shell=True) log.info(f"Starting LocalServer with PID of {proc.pid}") self.lspid = proc.pid def _es_connect(self): """ Create a connection to the ES via the localhost port-fwd Returns: Elasticsearch: elasticsearch connection object Raise: ConnectionError: if can not connect to the server """ try: es = Elasticsearch([{ "host": "localhost", "port": self.get_port() }]) except esexp.ConnectionError: log.error("Can not connect to ES server in the LocalServer") raise return es def get_indices(self): """ Getting list of all indices in the ES server - all created by the test, the installation of the ES was without any indexes pre-installed. Returns: list : list of all indices defined in the ES server """ results = [] log.info("Getting all indices") for ind in self.con.indices.get_alias("*"): results.append(ind) return results def _copy(self, es): """ Copy All data from the internal ES server to the main ES Args: es (obj): elasticsearch object which connected to the main ES """ query = {"size": 1000, "query": {"match_all": {}}} for ind in self.get_indices(): log.info(f"Reading {ind} from internal ES server") try: result = self.con.search(index=ind, body=query) except esexp.NotFoundError: log.warning(f"{ind} Not found in the Internal ES.") continue log.debug(f"The results from internal ES for {ind} are :{result}") log.info(f"Writing {ind} into main ES server") for doc in result["hits"]["hits"]: log.debug(f"Going to write : {doc}") es.index(index=ind, doc_type="_doc", body=doc["_source"])
class AMQ(object): """ Workload operation using AMQ """ def __init__(self, **kwargs): """ Initializer function Args: kwargs (dict): Following kwargs are valid namespace: namespace for the operator repo: AMQ repo where all necessary yaml file are there - a github link branch: branch to use from the repo """ self.args = kwargs self.repo = self.args.get('repo', constants.OCS_WORKLOADS) self.branch = self.args.get('branch', 'master') self.namespace = self.args.get('namespace', 'my-project') self.amq_is_setup = False self.ocp = OCP() self.ns_obj = OCP(kind='namespace') self.pod_obj = OCP(kind='pod') self.kafka_obj = OCP(kind='Kafka') self.kafka_connect_obj = OCP(kind="KafkaConnect") self.kafka_bridge_obj = OCP(kind="KafkaBridge") self._create_namespace() self._clone_amq() def _create_namespace(self): """ create namespace for amq """ self.ocp.new_project(self.namespace) def _clone_amq(self): """ clone the amq repo """ self.dir = tempfile.mkdtemp(prefix='amq_') try: log.info(f'cloning amq in {self.dir}') git_clone_cmd = f'git clone -b {self.branch} {self.repo} ' run( git_clone_cmd, shell=True, cwd=self.dir, check=True ) self.amq_dir = "ocs-workloads/amq/v1/install/cluster-operator" self.amq_dir_examples = "ocs-workloads/amq/v1/examples/templates/cluster-operator" self.amq_kafka_pers_yaml = "ocs-workloads/amq/v1/kafka-persistent.yaml" self.amq_kafka_connect_yaml = "ocs-workloads/amq/v1/kafka-connect.yaml" self.amq_kafka_bridge_yaml = "ocs-workloads/amq/v1/kafka-bridge.yaml" except (CommandFailed, CalledProcessError)as cf: log.error('Error during cloning of amq repository') raise cf def setup_amq_cluster_operator(self): """ Function to setup amq-cluster_operator, the file file is pulling from github it will make sure cluster-operator pod is running """ # self.amq_dir = constants.TEMPLATE_DEPLOYMENT_AMQ_CP run(f'oc apply -f {self.amq_dir} -n {self.namespace}', shell=True, check=True, cwd=self.dir) time.sleep(5) # Wait for strimzi-cluster-operator pod to be created if self.is_amq_pod_running(pod_pattern="cluster-operator"): log.info("strimzi-cluster-operator pod is in running state") else: raise ResourceWrongStatusException("strimzi-cluster-operator pod is not getting to running state") run(f'oc apply -f {self.amq_dir_examples} -n {self.namespace}', shell=True, check=True, cwd=self.dir) # checking pod status one more time if self.is_amq_pod_running(pod_pattern="cluster-operator"): log.info("strimzi-cluster-operator pod is in running state") else: raise ResourceWrongStatusException("strimzi-cluster-operator pod is not getting to running state") def is_amq_pod_running(self, pod_pattern="cluster-operator"): """ The function checks if provided pod_pattern finds a pod and if the status is running or not Args: pod_pattern (str): the pattern for pod Returns: bool: status of pod: True if found pod is running """ for pod in TimeoutSampler( 300, 10, get_pod_name_by_pattern, pod_pattern, self.namespace ): try: if pod[0] is not None: amq_pod = pod[0] break except IndexError as ie: log.error(pod_pattern + " pod not ready yet") raise ie # checking pod status if (self.pod_obj.wait_for_resource( condition='Running', resource_name=amq_pod, timeout=1600, sleep=30, ) ): log.info(amq_pod + " pod is up and running") return True else: return False def setup_amq_kafka_persistent(self): """ Function to setup amq-kafka-persistent, the file file is pulling from github it will make kind: Kafka and will make sure the status is running :return: kafka_persistent """ try: kafka_persistent = templating.load_yaml(os.path.join(self.dir, self.amq_kafka_pers_yaml)) self.kafka_persistent = OCS(**kafka_persistent) self.kafka_persistent.create() except(CommandFailed, CalledProcessError) as cf: log.error('Failed during setup of AMQ Kafka-persistent') raise cf time.sleep(5) if self.is_amq_pod_running(pod_pattern="zookeeper"): return self.kafka_persistent else: raise ResourceWrongStatusException("my-cluster-zookeeper Pod is not getting to running state") def setup_amq_kafka_connect(self): """ The function is to setup amq-kafka-connect, the yaml file is pulling from github it will make kind: KafkaConnect and will make sure the status is running Returns: kafka_connect object """ try: kafka_connect = templating.load_yaml(os.path.join(self.dir, self.amq_kafka_connect_yaml)) self.kafka_connect = OCS(**kafka_connect) self.kafka_connect.create() except(CommandFailed, CalledProcessError) as cf: log.error('Failed during setup of AMQ KafkaConnect') raise cf if self.is_amq_pod_running(pod_pattern="my-connect-cluster-connect"): return self.kafka_connect else: raise ResourceWrongStatusException("my-connect-cluster-connect pod is not getting to running state") def setup_amq_kafka_bridge(self): """ Function to setup amq-kafka, the file file is pulling from github it will make kind: KafkaBridge and will make sure the pod status is running Return: kafka_bridge object """ try: kafka_bridge = templating.load_yaml(os.path.join(self.dir, self.amq_kafka_bridge_yaml)) self.kafka_bridge = OCS(**kafka_bridge) self.kafka_bridge.create() except(CommandFailed, CalledProcessError) as cf: log.error('Failed during setup of AMQ KafkaConnect') raise cf # Making sure the kafka_bridge is running if self.is_amq_pod_running(pod_pattern="my-bridge-bridge"): return self.kafka_bridge else: raise ResourceWrongStatusException("kafka_bridge_pod pod is not getting to running state") def setup_amq(self): """ Setup AMQ from local folder, function will call all necessary sub functions to make sure amq installation is complete """ self.setup_amq_cluster_operator() self.setup_amq_kafka_persistent() self.setup_amq_kafka_connect() self.setup_amq_kafka_bridge() self.amq_is_setup = True return self def cleanup(self): """ Clean up function, will start to delete from amq cluster operator then amq-connector, persistent, bridge, at the end it will delete the created namespace """ if self.amq_is_setup: self.kafka_persistent.delete() self.kafka_connect.delete() self.kafka_bridge.delete() run_cmd(f'oc delete -f {self.amq_dir}', shell=True, check=True, cwd=self.dir) run_cmd(f'oc delete -f {self.amq_dir_examples}', shell=True, check=True, cwd=self.dir) run_cmd(f'oc delete project {self.namespace}') # Reset namespace to default switch_to_default_rook_cluster_project() self.ns_obj.wait_for_delete(resource_name=self.namespace)
class RipSaw(object): """ Workload operation using RipSaw """ def __init__(self, **kwargs): """ Initializer function Args: kwargs (dict): Following kwargs are valid repo: Ripsaw repo to used - a github link branch: branch to use from the repo namespace: namespace for the operator Example Usage: r1 = RipSaw() r1.apply_crd(crd='ripsaw_v1alpha1_ripsaw_crd.yaml') # use oc apply to apply custom modified bench my_custom_bench = my_custom_bench.yaml run_cmd('oc apply -f my_custom_bench') """ self.args = kwargs self.repo = self.args.get( "repo", "https://github.com/cloud-bulldozer/benchmark-operator") self.branch = self.args.get("branch", "master") self.namespace = self.args.get("namespace", RIPSAW_NAMESPACE) self.pgsql_is_setup = False self.ocp = OCP() self.ns_obj = OCP(kind="namespace") self.pod_obj = OCP(namespace=RIPSAW_NAMESPACE, kind="pod") self._create_namespace() self._clone_ripsaw() self.worker_nodes = [node.name for node in get_nodes()] helpers.label_worker_node(self.worker_nodes, label_key="kernel-cache-dropper", label_value="yes") def _create_namespace(self): """ create namespace for RipSaw """ self.ocp.new_project(self.namespace) def _clone_ripsaw(self): """ clone the ripaw repo """ self.dir = tempfile.mkdtemp(prefix="ripsaw_") try: log.info(f"cloning ripsaw in {self.dir}") git_clone_cmd = f"git clone -b {self.branch} {self.repo} " run(git_clone_cmd, shell=True, cwd=self.dir, check=True) self.crd = "resources/crds/" self.operator = "resources/operator.yaml" except (CommandFailed, CalledProcessError) as cf: log.error("Error during cloning of ripsaw repository") raise cf def apply_crd(self, crd): """ Apply the CRD Args: crd (str): Name of file to apply """ self.dir += "/benchmark-operator" run("oc apply -f deploy", shell=True, check=True, cwd=self.dir) run(f"oc apply -f {crd}", shell=True, check=True, cwd=self.dir) run(f"oc apply -f {self.operator}", shell=True, check=True, cwd=self.dir) run( "oc create -f resources/kernel-cache-drop-clusterrole.yaml", shell=True, check=True, cwd=self.dir, ) def get_uuid(self, benchmark): """ Getting the UUID of the test. when ripsaw used for running a benchmark tests, each run get its own UUID, so the results in the elastic-search server can be sorted. Args: benchmark (str): the name of the main pod in the test Return: str: the UUID of the test """ count = 0 while count <= 5: try: output = self.pod_obj.exec_oc_cmd(f"exec {benchmark} -- env") break except CommandFailed: time.sleep(3) count += 1 uuid = "" if output: for line in output.split(): if "uuid=" in line: uuid = line.split("=")[1] break log.info(f"The UUID of the test is : {uuid}") else: log.error(f"Can not get the UUID from {benchmark}") return uuid def cleanup(self): run(f"oc delete -f {self.crd}", shell=True, cwd=self.dir) run(f"oc delete -f {self.operator}", shell=True, cwd=self.dir) run("oc delete -f deploy", shell=True, cwd=self.dir) run_cmd(f"oc delete project {self.namespace}") run( "oc delete -f resources/kernel-cache-drop-clusterrole.yaml", shell=True, check=True, cwd=self.dir, ) self.ns_obj.wait_for_delete(resource_name=self.namespace, timeout=180) # Reset namespace to default switch_to_default_rook_cluster_project() helpers.remove_label_from_worker_node(self.worker_nodes, label_key="kernel-cache-dropper")
class ElasticSearch(object): """ ElasticSearch Environment """ def __init__(self, **kwargs): """ Initializer function """ log.info("Initializing the Elastic-Search environment object") self.args = kwargs self.namespace = "elastic-system" self.repo = self.args.get("repo", constants.OCS_WORKLOADS) self.branch = self.args.get("branch", "master") self.dir = tempfile.mkdtemp(prefix="eck_") # Clone the ECK repo locally self._clone() self.eck_path = os.path.join(self.dir, "ocs-workloads/eck") self.eck_file = os.path.join(self.eck_path, "crds.yaml") self.dumper_file = os.path.join(constants.TEMPLATE_APP_POD_DIR, "esclient.yaml") self.crd = os.path.join(constants.TEMPLATE_APP_POD_DIR, "esq.yaml") # Creating some different types of OCP objects self.ocp = OCP(kind="pod", resource_name="elastic-operator-0", namespace=self.namespace) self.ns_obj = OCP(kind="namespace", namespace=self.namespace) self.es = OCP(resource_name="quickstart-es-http", namespace=self.namespace) self.elasticsearch = OCP(namespace=self.namespace, kind="elasticsearch") self.password = OCP( kind="secret", resource_name="quickstart-es-elastic-user", namespace=self.namespace, ) # Deploy the ECK all-in-one.yaml file self._deploy_eck() # Deploy the Elastic-Search server self._deploy_es() # Verify that ES is Up & Running sample = TimeoutSampler(timeout=180, sleep=10, func=self.get_health) if not sample.wait_for_func_status(True): raise Exception("Elasticsearch deployment Failed") # Deploy the elasticsearch dumper pod self._deploy_data_dumper_client() # Connect to the server self.con = self._es_connect() def _clone(self): """ clone the ECK repo into temp directory """ try: log.info(f"Cloning ECK in {self.dir}") git_clone_cmd = f"git clone -b {self.branch} {self.repo} --depth 1" run(git_clone_cmd, shell=True, cwd=self.dir, check=True) except (CommandFailed, CalledProcessError) as cf: log.error("Error during cloning of ECK repository") raise cf def _pod_is_found(self, pattern): """ Boolean function which check if pod (by pattern) is exist. Args: pattern (str): the pattern of the pod to look for Returns: bool : True if pod found, otherwise False """ return len(get_pod_name_by_pattern(pattern, self.namespace)) > 0 def _deploy_eck(self): """ Deploying the ECK environment for the Elasticsearch, and make sure it is in Running mode """ log.info("Deploying the ECK environment for the ES cluster") log.info("Deploy the ECK CRD's") self.ocp.apply(self.eck_file) log.info("deploy the ECK operator") self.ocp.apply(f"{self.eck_path}/operator.yaml") sample = TimeoutSampler(timeout=300, sleep=10, func=self._pod_is_found, pattern="elastic-operator") if not sample.wait_for_func_status(True): err_msg = "ECK deployment Failed" log.error(err_msg) self.cleanup() raise Exception(err_msg) log.info("The ECK pod is ready !") def _deploy_data_dumper_client(self): """ Deploying elastic search client pod with utility which dump all the data from the server to .tgz file """ log.info("Deploying the es client for dumping all data") self.ocp.apply(self.dumper_file) sample = TimeoutSampler(timeout=300, sleep=10, func=self._pod_is_found, pattern="es-dumper") if not sample.wait_for_func_status(True): self.cleanup() raise Exception("Dumper pod deployment Failed") self.dump_pod = get_pod_name_by_pattern("es-dumper", self.namespace)[0] log.info(f"The dumper client pod {self.dump_pod} is ready !") def get_ip(self): """ This function return the IP address of the Elasticsearch cluster. this IP is to use inside the OCP cluster Return str : String that represent the Ip Address. """ return self.es.get()["spec"]["clusterIP"] def get_port(self): """ This function return the port of the Elasticsearch cluster. Return str : String that represent the port. """ return self.es.get()["spec"]["ports"][0]["port"] def _deploy_es(self): """ Deploying the Elasticsearch server """ # Creating PVC for the elasticsearch server and wait until it bound log.info("Creating 10 GiB PVC for the ElasticSearch cluster on") self.pvc_obj = create_pvc( sc_name=constants.CEPHBLOCKPOOL_SC, namespace=self.namespace, pvc_name="elasticsearch-data-quickstart-es-default-0", access_mode=constants.ACCESS_MODE_RWO, size="10Gi", ) wait_for_resource_state(self.pvc_obj, constants.STATUS_BOUND) self.pvc_obj.reload() log.info("Deploy the ElasticSearch cluster") self.ocp.apply(self.crd) sample = TimeoutSampler( timeout=300, sleep=10, func=self._pod_is_found, pattern="quickstart-es-default", ) if not sample.wait_for_func_status(True): self.cleanup() raise Exception("The ElasticSearch pod deployment Failed") self.espod = get_pod_name_by_pattern("quickstart-es-default", self.namespace)[0] log.info(f"The ElasticSearch pod {self.espod} Started") es_pod = OCP(kind="pod", namespace=self.namespace) log.info("Waiting for ElasticSearch to Run") assert es_pod.wait_for_resource( condition=constants.STATUS_RUNNING, resource_name=self.espod, sleep=30, timeout=600, ) log.info("Elastic Search is ready !!!") def get_health(self): """ This method return the health status of the Elasticsearch. Returns: bool : True if the status is green (OK) otherwise - False """ return self.elasticsearch.get( )["items"][0]["status"]["health"] == "green" def get_password(self): """ This method return the password used to connect the Elasticsearch. Returns: str : The password as text """ return base64.b64decode( self.password.get()["data"]["elastic"]).decode("utf-8") def cleanup(self): """ Cleanup the environment from all Elasticsearch components, and from the port forwarding process. """ log.info("Teardown the Elasticsearch environment") log.info("Deleting all resources") log.info("Deleting the dumper client pod") self.ocp.delete(yaml_file=self.dumper_file) log.info("Deleting the es resource") self.ocp.delete(yaml_file=self.crd) log.info("Deleting the es project") # self.ns_obj.delete_project(project_name=self.namespace) self.ocp.delete(f"{self.eck_path}/operator.yaml") self.ocp.delete(yaml_file=self.eck_file) self.ns_obj.wait_for_delete(resource_name=self.namespace, timeout=180) def _es_connect(self): """ Create a connection to the local ES Returns: Elasticsearch: elasticsearch connection object, None if Cannot connect to ES """ try: es = Elasticsearch([{ "host": self.get_ip(), "port": self.get_port() }]) except esexp.ConnectionError: log.warning("Cannot connect to ES server in the LocalServer") es = None return es def get_indices(self): """ Getting list of all indices in the ES server - all created by the test, the installation of the ES was without any indexes pre-installed. Returns: list : list of all indices defined in the ES server """ results = [] log.info("Getting all indices") for ind in self.con.indices.get_alias("*"): results.append(ind) return results def dumping_all_data(self, target_path): """ Dump All data from the internal ES server to .tgz file. Args: target_path (str): the path where the results file will be copy into Return: bool: True if the dump operation succeed and return the results data to the host otherwise False """ log.info("dumping data from ES server to .tgz file") rsh_cmd = f"rsh {self.dump_pod} /elasticsearch-dump/esdumper.py --ip {self.get_ip()} --port {self.get_port()}" result = self.ocp.exec_oc_cmd(rsh_cmd, out_yaml_format=False, timeout=1200) if "ES dump is done." not in result: log.error("There is no data in the Elasticsearch server") return False else: src_file = result.split()[-1] log.info(f"Copy {src_file} from the client pod") cp_command = f"cp {self.dump_pod}:{src_file} {target_path}/FullResults.tgz" result = self.ocp.exec_oc_cmd(cp_command, timeout=120) log.info(f"The output from the POD is {result}") log.info("Extracting the FullResults.tgz file") kwargs = {"cwd": target_path} results = run_command(f"tar zxvf {target_path}/FullResults.tgz", **kwargs) log.debug(f"The untar results is {results}") if "Error in command" in results: log.warning("Cannot untar the dumped file") return False return True
class RipSaw(object): """ Workload operation using RipSaw """ def __init__(self, **kwargs): """ Initializer function Args: kwargs (dict): Following kwargs are valid repo: Ripsaw repo to used - a github link branch: branch to use from the repo namespace: namespace for the operator Example Usage: r1 = RipSaw() r1.apply_crd(crd='ripsaw_v1alpha1_ripsaw_crd.yaml') # use oc apply to apply custom modified bench my_custom_bench = my_custom_bench.yaml run_cmd('oc apply -f my_custom_bench') """ self.args = kwargs self.repo = self.args.get('repo', 'https://github.com/cloud-bulldozer/ripsaw') self.branch = self.args.get('branch', 'master') self.namespace = self.args.get('namespace', 'my-ripsaw') self.pgsql_is_setup = False self.ocp = OCP() self.ns_obj = OCP(kind='namespace') self.pod_obj = OCP(kind='pod') self._create_namespace() self._clone_ripsaw() def _create_namespace(self): """ create namespace for RipSaw """ self.ocp.new_project(self.namespace) def _clone_ripsaw(self): """ clone the ripaw repo """ self.dir = tempfile.mkdtemp(prefix='ripsaw_') try: log.info(f'cloning ripsaw in {self.dir}') git_clone_cmd = f'git clone -b {self.branch} {self.repo} ' run(git_clone_cmd, shell=True, cwd=self.dir, check=True) self.crd = 'resources/crd/' self.operator = 'resources/operator.yaml' except (CommandFailed, CalledProcessError) as cf: log.error('Error during cloning of ripsaw repository') raise cf def apply_crd(self, crd): """ Apply the CRD Args: crd (str): Name of file to apply """ self.crd = crd self.dir += '/ripsaw' run(f'oc apply -f deploy', shell=True, check=True, cwd=self.dir) run(f'oc apply -f {crd}', shell=True, check=True, cwd=self.dir) run(f'oc apply -f {self.operator}', shell=True, check=True, cwd=self.dir) def setup_postgresql(self): """ Deploy postgres sql server """ try: pgsql_service = templating.load_yaml(constants.PGSQL_SERVICE_YAML) pgsql_cmap = templating.load_yaml(constants.PGSQL_CONFIGMAP_YAML) pgsql_sset = templating.load_yaml(constants.PGSQL_STATEFULSET_YAML) self.pgsql_service = OCS(**pgsql_service) self.pgsql_service.create() self.pgsql_cmap = OCS(**pgsql_cmap) self.pgsql_cmap.create() self.pgsql_sset = OCS(**pgsql_sset) self.pgsql_sset.create() self.pod_obj.wait_for_resource(condition='Running', selector='app=postgres', timeout=120) except (CommandFailed, CalledProcessError) as cf: log.error('Failed during setup of PostgreSQL server') raise cf self.pgsql_is_setup = True def cleanup(self): run(f'oc delete -f {self.crd}', shell=True, cwd=self.dir) run(f'oc delete -f {self.operator}', shell=True, cwd=self.dir) run(f'oc delete -f deploy', shell=True, cwd=self.dir) if self.pgsql_is_setup: self.pgsql_sset.delete() self.pgsql_cmap.delete() self.pgsql_service.delete() run_cmd(f'oc delete project {self.namespace}') self.ns_obj.wait_for_delete(resource_name=self.namespace) # Reset namespace to default switch_to_default_rook_cluster_project()
class AMQ(object): """ Workload operation using AMQ """ def __init__(self, **kwargs): """ Initializer function Args: kwargs (dict): Following kwargs are valid namespace: namespace for the operator repo: AMQ repo where all necessary yaml file are there - a github link branch: branch to use from the repo """ self.args = kwargs self.repo = self.args.get("repo", constants.KAFKA_OPERATOR) self.branch = self.args.get("branch", "master") self.ocp = OCP() self.ns_obj = OCP(kind="namespace") self.pod_obj = OCP(kind="pod") self.kafka_obj = OCP(kind="Kafka") self.kafka_connect_obj = OCP(kind="KafkaConnect") self.kafka_bridge_obj = OCP(kind="KafkaBridge") self.kafka_topic_obj = OCP(kind="KafkaTopic") self.kafka_user_obj = OCP(kind="KafkaUser") self.amq_is_setup = False self.messaging = False self.benchmark = False self.consumer_pod = self.producer_pod = None self.kafka_topic = self.kafka_user = None self.kafka_connect = self.kafka_bridge = self.kafka_persistent = None self.dir = tempfile.mkdtemp(prefix="amq_") self._clone_amq() def _clone_amq(self): """ clone the amq repo """ try: log.info(f"cloning amq in {self.dir}") git_clone_cmd = f"git clone {self.repo} " run(git_clone_cmd, shell=True, cwd=self.dir, check=True) self.amq_dir = "strimzi-kafka-operator/packaging/install/cluster-operator/" self.amq_kafka_pers_yaml = ( "strimzi-kafka-operator/packaging/examples/kafka/kafka-persistent.yaml" ) self.amq_kafka_connect_yaml = ( "strimzi-kafka-operator/packaging/examples/connect/kafka-connect.yaml" ) self.amq_kafka_bridge_yaml = ( "strimzi-kafka-operator/packaging/examples/bridge/kafka-bridge.yaml" ) self.kafka_topic_yaml = ( "strimzi-kafka-operator/packaging/examples/topic/kafka-topic.yaml" ) self.kafka_user_yaml = ( "strimzi-kafka-operator/packaging/examples/user/kafka-user.yaml" ) self.hello_world_producer_yaml = constants.HELLO_WORLD_PRODUCER_YAML self.hello_world_consumer_yaml = constants.HELLO_WORLD_CONSUMER_YAML except (CommandFailed, CalledProcessError) as cf: log.error("Error during cloning of amq repository") raise cf def create_namespace(self, namespace): """ create namespace for amq Args: namespace (str): Namespace for amq pods """ self.ocp.new_project(namespace) def setup_amq_cluster_operator(self, namespace=constants.AMQ_NAMESPACE): """ Function to setup amq-cluster_operator, the file is pulling from github it will make sure cluster-operator pod is running Args: namespace (str): Namespace for AMQ pods """ # Namespace for amq try: self.create_namespace(namespace) except CommandFailed as ef: if f'project.project.openshift.io "{namespace}" already exists' not in str( ef ): raise ef # Create strimzi-cluster-operator pod run( f"for i in `(ls strimzi-kafka-operator/packaging/install/cluster-operator/)`;" f"do sed 's/{namespace}/myproject/g' " f"strimzi-kafka-operator/packaging/install/cluster-operator/$i;done", shell=True, check=True, cwd=self.dir, ) self.strimzi_kafka_operator = os.path.join(self.dir, self.amq_dir) pf_files = os.listdir(self.strimzi_kafka_operator) crds = [] for crd in pf_files: crds.append(crd) self.crd_objects = [] for adm_yaml in crds: try: adm_data = templating.load_yaml(self.strimzi_kafka_operator + adm_yaml) adm_obj = OCS(**adm_data) adm_obj.create() self.crd_objects.append(adm_obj) except (CommandFailed, CalledProcessError) as cfe: if "Error is Error from server (AlreadyExists):" in str(cfe): log.warn( "Some amq leftovers are present, please cleanup the cluster" ) pytest.skip( "AMQ leftovers are present needs to cleanup the cluster" ) time.sleep(30) # Check strimzi-cluster-operator pod created if self.is_amq_pod_running(pod_pattern="cluster-operator", expected_pods=1): log.info("strimzi-cluster-operator pod is in running state") else: raise ResourceWrongStatusException( "strimzi-cluster-operator pod is not getting to running state" ) def is_amq_pod_running( self, pod_pattern, expected_pods, namespace=constants.AMQ_NAMESPACE ): """ The function checks if provided pod_pattern finds a pod and if the status is running or not Args: pod_pattern (str): the pattern for pod expected_pods (int): Number of pods namespace (str): Namespace for amq pods Returns: bool: status of pod: True if found pod is running """ _rc = True for pod in TimeoutSampler( 300, 10, get_pod_name_by_pattern, pod_pattern, namespace ): try: if pod is not None and len(pod) == expected_pods: amq_pod = pod break except IndexError as ie: log.error(" pod not ready yet") raise ie # checking pod status for pod in amq_pod: if self.pod_obj.wait_for_resource( condition="Running", resource_name=pod, timeout=1600, sleep=30, ): log.info(f"{pod} pod is up and running") else: _rc = False log.error(f"{pod} pod is not running") return _rc def setup_amq_kafka_persistent(self, sc_name, size=100, replicas=3): """ Function to setup amq-kafka-persistent, the file is pulling from github it will make kind: Kafka and will make sure the status is running Args: sc_name (str): Name of sc size (int): Size of the storage in Gi replicas (int): Number of kafka and zookeeper pods to be created return : kafka_persistent """ if storagecluster_independent_check(): sc_name = constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_RBD try: kafka_persistent = templating.load_yaml( os.path.join(self.dir, self.amq_kafka_pers_yaml) ) kafka_persistent["spec"]["kafka"]["replicas"] = replicas kafka_persistent["spec"]["kafka"]["storage"]["volumes"][0][ "class" ] = sc_name kafka_persistent["spec"]["kafka"]["storage"]["volumes"][0][ "size" ] = f"{size}Gi" kafka_persistent["spec"]["zookeeper"]["replicas"] = replicas kafka_persistent["spec"]["zookeeper"]["storage"]["class"] = sc_name kafka_persistent["spec"]["zookeeper"]["storage"]["size"] = f"{size}Gi" self.kafka_persistent = OCS(**kafka_persistent) self.kafka_persistent.create() except (CommandFailed, CalledProcessError) as cf: log.error("Failed during setup of AMQ Kafka-persistent") raise cf time.sleep(40) if self.is_amq_pod_running( pod_pattern="my-cluster", expected_pods=(replicas * 2) + 1 ): return self.kafka_persistent else: raise ResourceWrongStatusException( "my-cluster-kafka and my-cluster-zookeeper " "Pod is not getting to running state" ) def setup_amq_kafka_connect(self): """ The function is to setup amq-kafka-connect, the yaml file is pulling from github it will make kind: KafkaConnect and will make sure the status is running Returns: kafka_connect object """ try: kafka_connect = templating.load_yaml( os.path.join(self.dir, self.amq_kafka_connect_yaml) ) self.kafka_connect = OCS(**kafka_connect) self.kafka_connect.create() except (CommandFailed, CalledProcessError) as cf: log.error("Failed during setup of AMQ KafkaConnect") raise cf if self.is_amq_pod_running( pod_pattern="my-connect-cluster-connect", expected_pods=1 ): return self.kafka_connect else: raise ResourceWrongStatusException( "my-connect-cluster-connect pod is not getting to running state" ) def setup_amq_kafka_bridge(self): """ Function to setup amq-kafka, the file file is pulling from github it will make kind: KafkaBridge and will make sure the pod status is running Return: kafka_bridge object """ try: kafka_bridge = templating.load_yaml( os.path.join(self.dir, self.amq_kafka_bridge_yaml) ) self.kafka_bridge = OCS(**kafka_bridge) self.kafka_bridge.create() except (CommandFailed, CalledProcessError) as cf: log.error("Failed during setup of AMQ KafkaConnect") raise cf # Making sure the kafka_bridge is running if self.is_amq_pod_running(pod_pattern="my-bridge-bridge", expected_pods=1): return self.kafka_bridge else: raise ResourceWrongStatusException( "kafka_bridge_pod pod is not getting to running state" ) def create_kafka_topic(self, name="my-topic", partitions=1, replicas=1): """ Creates kafka topic Args: name (str): Name of the kafka topic partitions (int): Number of partitions replicas (int): Number of replicas Return: kafka_topic object """ try: kafka_topic = templating.load_yaml( os.path.join(self.dir, self.kafka_topic_yaml) ) kafka_topic["metadata"]["name"] = name kafka_topic["spec"]["partitions"] = partitions kafka_topic["spec"]["replicas"] = replicas self.kafka_topic = OCS(**kafka_topic) self.kafka_topic.create() except (CommandFailed, CalledProcessError) as cf: if f'kafkatopics.kafka.strimzi.io "{name}" already exists' not in str(cf): log.error("Failed during creating of Kafka topic") raise cf # Making sure kafka topic created if self.kafka_topic_obj.get(resource_name=name): return self.kafka_topic else: raise ResourceWrongStatusException("kafka topic is not created") def create_kafka_user(self, name="my-user"): """ Creates kafka user Args: name (str): Name of the kafka user Return: kafka_user object """ try: kafka_user = templating.load_yaml( os.path.join(self.dir, self.kafka_user_yaml) ) kafka_user["metadata"]["name"] = name self.kafka_user = OCS(**kafka_user) self.kafka_user.create() except (CommandFailed, CalledProcessError) as cf: log.error("Failed during creating of Kafka user") raise cf # Making sure kafka user created if self.kafka_user_obj.get(resource_name=name): return self.kafka_user else: raise ResourceWrongStatusException("kafka user is not created") def create_producer_pod(self, num_of_pods=1, value="10000"): """ Creates producer pods Args: num_of_pods (int): Number of producer pods to be created value (str): Number of the messages to be sent Returns: producer pod object """ try: producer_pod = templating.load_yaml(constants.HELLO_WORLD_PRODUCER_YAML) producer_pod["spec"]["replicas"] = num_of_pods producer_pod["spec"]["template"]["spec"]["containers"][0]["env"][4][ "value" ] = value self.producer_pod = OCS(**producer_pod) self.producer_pod.create() except (CommandFailed, CalledProcessError) as cf: log.error("Failed during creation of producer pod") raise cf # Making sure the producer pod is running if self.is_amq_pod_running( pod_pattern="hello-world-producer", expected_pods=num_of_pods ): return self.producer_pod else: raise ResourceWrongStatusException( "producer pod is not getting to running state" ) def create_consumer_pod(self, num_of_pods=1, value="10000"): """ Creates producer pods Args: num_of_pods (int): Number of consumer pods to be created value (str): Number of messages to be received Returns: consumer pod object """ try: consumer_pod = templating.load_yaml(constants.HELLO_WORLD_CONSUMER_YAML) consumer_pod["spec"]["replicas"] = num_of_pods consumer_pod["spec"]["template"]["spec"]["containers"][0]["env"][4][ "value" ] = value self.consumer_pod = OCS(**consumer_pod) self.consumer_pod.create() except (CommandFailed, CalledProcessError) as cf: log.error("Failed during creation of consumer pod") raise cf # Making sure the producer pod is running if self.is_amq_pod_running( pod_pattern="hello-world-consumer", expected_pods=num_of_pods ): return self.consumer_pod else: raise ResourceWrongStatusException( "consumer pod is not getting to running state" ) def validate_msg( self, pod, namespace=constants.AMQ_NAMESPACE, value="10000", since_time=1800 ): """ Validate if messages are sent or received Args: pod (str): Name of the pod namespace (str): Namespace of the pod value (str): Number of messages are sent since_time (int): Number of seconds to required to sent the msg Returns: bool : True if all messages are sent/received """ cmd = f"oc logs -n {namespace} {pod} --since={since_time}s" msg = run_cmd(cmd) substring = f"Hello world - {int(value) - 1}" if msg.find(substring) == -1: return False else: return True def validate_messages_are_produced( self, namespace=constants.AMQ_NAMESPACE, value="10000", since_time=1800 ): """ Validates if all messages are sent in producer pod Args: namespace (str): Namespace of the pod value (str): Number of messages are sent since_time (int): Number of seconds to required to sent the msg Raises exception on failures """ # ToDo: Support multiple topics and users producer_pod_objs = [ get_pod_obj(pod) for pod in get_pod_name_by_pattern("hello-world-produce", namespace) ] for pod in producer_pod_objs: for msg in TimeoutSampler( 900, 30, self.validate_msg, pod.name, namespace, value, since_time ): if msg: break assert msg, "Few messages are not sent by producer" log.info("Producer sent all messages") def validate_messages_are_consumed( self, namespace=constants.AMQ_NAMESPACE, value="10000", since_time=1800 ): """ Validates if all messages are received in consumer pod Args: namespace (str): Namespace of the pod value (str): Number of messages are recieved since_time (int): Number of seconds to required to receive the msg Raises exception on failures """ # ToDo: Support multiple topics and users consumer_pod_objs = [ get_pod_obj(pod) for pod in get_pod_name_by_pattern("hello-world-consumer", namespace) ] for pod in consumer_pod_objs: for msg in TimeoutSampler( 900, 30, self.validate_msg, pod.name, namespace, value, since_time ): if msg: break assert msg, "Consumer didn't receive all messages" log.info("Consumer received all messages") def run_in_bg( self, namespace=constants.AMQ_NAMESPACE, value="10000", since_time=1800 ): """ Validate messages are produced and consumed in bg Args: namespace (str): Namespace of the pod value (str): Number of messages to be sent and received since_time (int): Number of seconds to required to sent and receive msg """ # Todo: Check for each messages sent and received log.info("Running open messages on pod in bg") threads = [] executor = ThreadPoolExecutor(2) threads.append( executor.submit( self.validate_messages_are_produced, namespace, value, since_time ) ) threads.append( executor.submit( self.validate_messages_are_consumed, namespace, value, since_time ) ) return threads def run_amq_benchmark( self, benchmark_pod_name="benchmark", kafka_namespace=constants.AMQ_NAMESPACE, tiller_namespace=AMQ_BENCHMARK_NAMESPACE, num_of_clients=8, worker=None, timeout=1800, amq_workload_yaml=None, run_in_bg=False, ): """ Run benchmark pod and get the results Args: benchmark_pod_name (str): Name of the benchmark pod kafka_namespace (str): Namespace where kafka cluster created tiller_namespace (str): Namespace where tiller pod needs to be created num_of_clients (int): Number of clients to be created worker (str) : Loads to create on workloads separated with commas e.g http://benchmark-worker-0.benchmark-worker:8080, http://benchmark-worker-1.benchmark-worker:8080 timeout (int): Time to complete the run amq_workload_yaml (dict): Contains amq workloads information keys and values :name (str): Name of the workloads :topics (int): Number of topics created :partitions_per_topic (int): Number of partitions per topic :message_size (int): Message size :payload_file (str): Load to run on workload :subscriptions_per_topic (int): Number of subscriptions per topic :consumer_per_subscription (int): Number of consumers per subscription :producers_per_topic (int): Number of producers per topic :producer_rate (int): Producer rate :consumer_backlog_sizegb (int): Size of block in gb :test_duration_minutes (int): Time to run the workloads run_in_bg (bool): On true the workload will run in background Return: result (str/Thread obj): Returns benchmark run information if run_in_bg is False. Otherwise a thread of the amq workload execution """ # Namespace for to helm/tiller try: self.create_namespace(tiller_namespace) except CommandFailed as ef: if ( f'project.project.openshift.io "{tiller_namespace}" already exists' not in str(ef) ): raise ef # Create rbac file try: sa_tiller = list( templating.load_yaml(constants.AMQ_RBAC_YAML, multi_document=True) ) sa_tiller[0]["metadata"]["namespace"] = tiller_namespace sa_tiller[1]["subjects"][0]["namespace"] = tiller_namespace self.sa_tiller = OCS(**sa_tiller[0]) self.crb_tiller = OCS(**sa_tiller[1]) self.sa_tiller.create() self.crb_tiller.create() except (CommandFailed, CalledProcessError) as cf: log.error("Failed during creation of service account tiller") raise cf # Install helm cli (version v2.16.0 as we need tiller component) # And create tiller pods wget_cmd = f"wget -c --read-timeout=5 --tries=0 {URL}" untar_cmd = "tar -zxvf helm-v2.16.1-linux-amd64.tar.gz" tiller_cmd = ( f"linux-amd64/helm init --tiller-namespace {tiller_namespace}" f" --service-account {tiller_namespace}" ) exec_cmd(cmd=wget_cmd, cwd=self.dir) exec_cmd(cmd=untar_cmd, cwd=self.dir) exec_cmd(cmd=tiller_cmd, cwd=self.dir) # Validate tiller pod is running log.info("Waiting for 30s for tiller pod to come up") time.sleep(30) if self.is_amq_pod_running( pod_pattern="tiller", expected_pods=1, namespace=tiller_namespace ): log.info("Tiller pod is running") else: raise ResourceWrongStatusException("Tiller pod is not in running state") # Create benchmark pods log.info("Create benchmark pods") values = templating.load_yaml(constants.AMQ_BENCHMARK_VALUE_YAML) values["numWorkers"] = num_of_clients benchmark_cmd = ( f"linux-amd64/helm install {constants.AMQ_BENCHMARK_POD_YAML}" f" --name {benchmark_pod_name} --tiller-namespace {tiller_namespace}" ) exec_cmd(cmd=benchmark_cmd, cwd=self.dir) # Making sure the benchmark pod and clients are running if self.is_amq_pod_running( pod_pattern="benchmark", expected_pods=(1 + num_of_clients), namespace=tiller_namespace, ): log.info("All benchmark pod is up and running") else: raise ResourceWrongStatusException( "Benchmark pod is not getting to running state" ) # Update commonConfig with kafka-bootstrap server details driver_kafka = templating.load_yaml(constants.AMQ_DRIVER_KAFKA_YAML) driver_kafka[ "commonConfig" ] = f"bootstrap.servers=my-cluster-kafka-bootstrap.{kafka_namespace}.svc.cluster.local:9092" json_file = f"{self.dir}/driver_kafka" templating.dump_data_to_json(driver_kafka, json_file) cmd = f"cp {json_file} {benchmark_pod_name}-driver:/" self.pod_obj.exec_oc_cmd(cmd) # Update the workload yaml if not amq_workload_yaml: amq_workload_yaml = templating.load_yaml(constants.AMQ_WORKLOAD_YAML) yaml_file = f"{self.dir}/amq_workload.yaml" templating.dump_data_to_temp_yaml(amq_workload_yaml, yaml_file) cmd = f"cp {yaml_file} {benchmark_pod_name}-driver:/" self.pod_obj.exec_oc_cmd(cmd) self.benchmark = True # Run the benchmark if worker: cmd = f"bin/benchmark --drivers /driver_kafka --workers {worker} /amq_workload.yaml" else: cmd = "bin/benchmark --drivers /driver_kafka /amq_workload.yaml" log.info(f"Run benchmark and running command {cmd} inside the benchmark pod ") if run_in_bg: executor = ThreadPoolExecutor(1) result = executor.submit( self.run_amq_workload, cmd, benchmark_pod_name, tiller_namespace, timeout, ) return result pod_obj = get_pod_obj( name=f"{benchmark_pod_name}-driver", namespace=tiller_namespace ) result = pod_obj.exec_cmd_on_pod( command=cmd, out_yaml_format=False, timeout=timeout ) return result def run_amq_workload(self, command, benchmark_pod_name, tiller_namespace, timeout): """ Runs amq workload in bg Args: command (str): Command to run on pod benchmark_pod_name (str): Pod name tiller_namespace (str): Namespace of pod timeout (int): Time to complete the run Returns: result (str): Returns benchmark run information """ pod_obj = get_pod_obj( name=f"{benchmark_pod_name}-driver", namespace=tiller_namespace ) return pod_obj.exec_cmd_on_pod( command=command, out_yaml_format=False, timeout=timeout ) def validate_amq_benchmark( self, result, amq_workload_yaml, benchmark_pod_name="benchmark" ): """ Validates amq benchmark run Args: result (str): Benchmark run information amq_workload_yaml (dict): AMQ workload information benchmark_pod_name (str): Name of the benchmark pod Returns: res_dict (dict): Returns the dict output on success, Otherwise none """ res_dict = {} res_dict["topic"] = amq_workload_yaml["topics"] res_dict["partitionsPerTopic"] = amq_workload_yaml["partitionsPerTopic"] res_dict["messageSize"] = amq_workload_yaml["messageSize"] res_dict["payloadFile"] = amq_workload_yaml["payloadFile"] res_dict["subscriptionsPerTopic"] = amq_workload_yaml["subscriptionsPerTopic"] res_dict["producersPerTopic"] = amq_workload_yaml["producersPerTopic"] res_dict["consumerPerSubscription"] = amq_workload_yaml[ "consumerPerSubscription" ] res_dict["producerRate"] = amq_workload_yaml["producerRate"] # Validate amq benchmark is completed for part in result.split(): if ".json" in part: workload_json_file = part if workload_json_file: cmd = f"rsync {benchmark_pod_name}-driver:{workload_json_file} {self.dir} -n {AMQ_BENCHMARK_NAMESPACE}" self.pod_obj.exec_oc_cmd(command=cmd, out_yaml_format=False) # Parse the json file with open(f"{self.dir}/{workload_json_file}") as json_file: data = json.load(json_file) res_dict["AvgpublishRate"] = sum(data.get("publishRate")) / len( data.get("publishRate") ) res_dict["AvgConsumerRate"] = sum(data.get("consumeRate")) / len( data.get("consumeRate") ) res_dict["AvgMsgBacklog"] = sum(data.get("backlog")) / len( data.get("backlog") ) res_dict["publishLatencyAvg"] = sum(data.get("publishLatencyAvg")) / len( data.get("publishLatencyAvg") ) res_dict["aggregatedPublishLatencyAvg"] = data.get( "aggregatedPublishLatencyAvg" ) res_dict["aggregatedPublishLatency50pct"] = data.get( "aggregatedPublishLatency50pct" ) res_dict["aggregatedPublishLatency75pct"] = data.get( "aggregatedPublishLatency75pct" ) res_dict["aggregatedPublishLatency95pct"] = data.get( "aggregatedPublishLatency95pct" ) res_dict["aggregatedPublishLatency99pct"] = data.get( "aggregatedPublishLatency99pct" ) res_dict["aggregatedPublishLatency999pct"] = data.get( "aggregatedPublishLatency999pct" ) res_dict["aggregatedPublishLatency9999pct"] = data.get( "aggregatedPublishLatency9999pct" ) res_dict["aggregatedPublishLatencyMax"] = data.get( "aggregatedPublishLatencyMax" ) res_dict["aggregatedEndToEndLatencyAvg"] = data.get( "aggregatedEndToEndLatencyAvg" ) res_dict["aggregatedEndToEndLatency50pct"] = data.get( "aggregatedEndToEndLatency50pct" ) res_dict["aggregatedEndToEndLatency75pct"] = data.get( "aggregatedEndToEndLatency75pct" ) res_dict["aggregatedEndToEndLatency95pct"] = data.get( "aggregatedEndToEndLatency95pct" ) res_dict["aggregatedEndToEndLatency99pct"] = data.get( "aggregatedEndToEndLatency99pct" ) res_dict["aggregatedEndToEndLatency999pct"] = data.get( "aggregatedEndToEndLatency999pct" ) res_dict["aggregatedEndToEndLatency9999pct"] = data.get( "aggregatedEndToEndLatency9999pct" ) res_dict["aggregatedEndToEndLatencyMax"] = data.get( "aggregatedEndToEndLatencyMax" ) else: log.error("Benchmark didn't run completely") return None amq_benchmark_pod_table = PrettyTable(["key", "value"]) for key, val in res_dict.items(): amq_benchmark_pod_table.add_row([key, val]) log.info(f"\n{amq_benchmark_pod_table}\n") return res_dict def export_amq_output_to_gsheet(self, amq_output, sheet_name, sheet_index): """ Collect amq data to google spreadsheet Args: amq_output (dict): amq output in dict sheet_name (str): Name of the sheet sheet_index (int): Index of sheet """ # Collect data and export to Google doc spreadsheet g_sheet = GoogleSpreadSheetAPI(sheet_name=sheet_name, sheet_index=sheet_index) log.info("Exporting amq data to google spreadsheet") headers_to_key = [] values = [] for key, val in amq_output.items(): headers_to_key.append(key) values.append(val) # Update amq_result to gsheet g_sheet.insert_row(values, 2) g_sheet.insert_row(headers_to_key, 2) # Capturing versions(OCP, OCS and Ceph) and test run name g_sheet.insert_row( [ f"ocp_version:{utils.get_cluster_version()}", f"ocs_build_number:{utils.get_ocs_build_number()}", f"ceph_version:{utils.get_ceph_version()}", f"test_run_name:{utils.get_testrun_name()}", ], 2, ) def create_messaging_on_amq( self, topic_name="my-topic", user_name="my-user", partitions=1, replicas=1, num_of_producer_pods=1, num_of_consumer_pods=1, value="10000", ): """ Creates workload using Open Messaging tool on amq cluster Args: topic_name (str): Name of the topic to be created user_name (str): Name of the user to be created partitions (int): Number of partitions of topic replicas (int): Number of replicas of topic num_of_producer_pods (int): Number of producer pods to be created num_of_consumer_pods (int): Number of consumer pods to be created value (str): Number of messages to be sent and received """ self.create_kafka_topic(topic_name, partitions, replicas) self.create_kafka_user(user_name) self.create_producer_pod(num_of_producer_pods, value) self.create_consumer_pod(num_of_consumer_pods, value) self.messaging = True def setup_amq_cluster( self, sc_name, namespace=constants.AMQ_NAMESPACE, size=100, replicas=3 ): """ Creates amq cluster with persistent storage. Args: sc_name (str): Name of sc namespace (str): Namespace for amq cluster size (int): Size of the storage replicas (int): Number of kafka and zookeeper pods to be created """ if storagecluster_independent_check(): sc_name = constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_RBD self.setup_amq_cluster_operator(namespace) self.setup_amq_kafka_persistent(sc_name, size, replicas) self.setup_amq_kafka_connect() self.setup_amq_kafka_bridge() self.amq_is_setup = True return self def create_kafkadrop(self, wait=True): """ Create kafkadrop pod, service and routes Args: wait (bool): If true waits till kafkadrop pod running Return: tuple: Contains objects of kafkadrop pod, service and route """ # Create kafkadrop pod try: kafkadrop = list( templating.load_yaml(constants.KAFKADROP_YAML, multi_document=True) ) self.kafkadrop_pod = OCS(**kafkadrop[0]) self.kafkadrop_svc = OCS(**kafkadrop[1]) self.kafkadrop_route = OCS(**kafkadrop[2]) self.kafkadrop_pod.create() self.kafkadrop_svc.create() self.kafkadrop_route.create() except (CommandFailed, CalledProcessError) as cf: log.error("Failed during creation of kafkadrop which kafka UI") raise cf # Validate kafkadrop pod running if wait: ocp_obj = OCP(kind=constants.POD, namespace=constants.AMQ_NAMESPACE) ocp_obj.wait_for_resource( condition=constants.STATUS_RUNNING, selector="app=kafdrop", timeout=120, sleep=5, ) return self.kafkadrop_pod, self.kafkadrop_svc, self.kafkadrop_route def cleanup( self, kafka_namespace=constants.AMQ_NAMESPACE, tiller_namespace=AMQ_BENCHMARK_NAMESPACE, ): """ Clean up function, will start to delete from amq cluster operator then amq-connector, persistent, bridge, at the end it will delete the created namespace Args: kafka_namespace (str): Created namespace for amq tiller_namespace (str): Created namespace for benchmark """ if self.consumer_pod: self.consumer_pod.delete() if self.producer_pod: self.producer_pod.delete() if self.kafka_user: self.kafka_user.delete() if self.kafka_topic: self.kafka_topic.delete() if self.benchmark: # Delete the helm app try: purge_cmd = f"linux-amd64/helm delete benchmark --purge --tiller-namespace {tiller_namespace}" run(purge_cmd, shell=True, cwd=self.dir, check=True) except (CommandFailed, CalledProcessError) as cf: log.error("Failed to delete help app") raise cf # Delete the pods and namespace created self.sa_tiller.delete() self.crb_tiller.delete() run_cmd(f"oc delete project {tiller_namespace}") self.ns_obj.wait_for_delete(resource_name=tiller_namespace) if self.kafka_connect: self.kafka_connect.delete() if self.kafka_bridge: self.kafka_bridge.delete() if self.kafka_persistent: self.kafka_persistent.delete() log.info("Waiting for 20 seconds to delete persistent") time.sleep(20) ocs_pvc_obj = get_all_pvc_objs(namespace=kafka_namespace) if ocs_pvc_obj: delete_pvcs(ocs_pvc_obj) for pvc in ocs_pvc_obj: logging.info(pvc.name) validate_pv_delete(pvc.backed_pv) if self.crd_objects: for adm_obj in self.crd_objects: adm_obj.delete() time.sleep(20) # Reset namespace to default switch_to_default_rook_cluster_project() run_cmd(f"oc delete project {kafka_namespace}") self.ns_obj.wait_for_delete(resource_name=kafka_namespace, timeout=90)
def test_noobaa_rebuild(self, bucket_factory): """ Test case to verify noobaa rebuild. Verifies KCS: https://access.redhat.com/solutions/5948631 1. Stop the noobaa-operator by setting the replicas of noobaa-operator deployment to 0. 2. Delete the noobaa deployments/statefulsets. 3. Delete the PVC db-noobaa-db-0. 4. Patch existing backingstores and bucketclasses to remove finalizer 5. Delete the backingstores/bucketclass. 6. Delete the noobaa secrets. 7. Restart noobaa-operator by setting the replicas back to 1. 8. Monitor the pods in openshift-storage for noobaa pods to be Running. """ dep_ocp = OCP(kind=constants.DEPLOYMENT, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE) state_ocp = OCP(kind=constants.STATEFULSET, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE) noobaa_pvc_obj = get_pvc_objs(pvc_names=["db-noobaa-db-pg-0"]) # Scale down noobaa operator logger.info( f"Scaling down {constants.NOOBAA_OPERATOR_DEPLOYMENT} deployment to replica: 0" ) dep_ocp.exec_oc_cmd( f"scale deployment {constants.NOOBAA_OPERATOR_DEPLOYMENT} --replicas=0" ) # Delete noobaa deployments and statefulsets logger.info("Deleting noobaa deployments and statefulsets") dep_ocp.delete(resource_name=constants.NOOBAA_ENDPOINT_DEPLOYMENT) state_ocp.delete(resource_name=constants.NOOBAA_DB_STATEFULSET) state_ocp.delete(resource_name=constants.NOOBAA_CORE_STATEFULSET) # Delete noobaa-db pvc pvc_obj = OCP(kind=constants.PVC, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE) logger.info("Deleting noobaa-db pvc") pvc_obj.delete(resource_name=noobaa_pvc_obj[0].name, wait=True) pvc_obj.wait_for_delete(resource_name=noobaa_pvc_obj[0].name, timeout=300) # Patch and delete existing backingstores params = '{"metadata": {"finalizers":null}}' bs_obj = OCP(kind=constants.BACKINGSTORE, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE) for bs in bs_obj.get()["items"]: assert bs_obj.patch( resource_name=bs["metadata"]["name"], params=params, format_type="merge", ), "Failed to change the parameter in backingstore" logger.info(f"Deleting backingstore: {bs['metadata']['name']}") bs_obj.delete(resource_name=bs["metadata"]["name"]) # Patch and delete existing bucketclass bc_obj = OCP(kind=constants.BUCKETCLASS, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE) for bc in bc_obj.get()["items"]: assert bc_obj.patch( resource_name=bc["metadata"]["name"], params=params, format_type="merge", ), "Failed to change the parameter in bucketclass" logger.info(f"Deleting bucketclass: {bc['metadata']['name']}") bc_obj.delete(resource_name=bc["metadata"]["name"]) # Delete noobaa secrets logger.info("Deleting noobaa related secrets") dep_ocp.exec_oc_cmd( "delete secrets noobaa-admin noobaa-endpoints noobaa-operator noobaa-server" ) # Scale back noobaa-operator deployment logger.info( f"Scaling back {constants.NOOBAA_OPERATOR_DEPLOYMENT} deployment to replica: 1" ) dep_ocp.exec_oc_cmd( f"scale deployment {constants.NOOBAA_OPERATOR_DEPLOYMENT} --replicas=1" ) # Wait and validate noobaa PVC is in bound state pvc_obj.wait_for_resource( condition=constants.STATUS_BOUND, resource_name=noobaa_pvc_obj[0].name, timeout=600, sleep=120, ) # Validate noobaa pods are up and running pod_obj = OCP(kind=constants.POD, namespace=defaults.ROOK_CLUSTER_NAMESPACE) noobaa_pods = get_noobaa_pods() pod_obj.wait_for_resource( condition=constants.STATUS_RUNNING, resource_count=len(noobaa_pods), selector=constants.NOOBAA_APP_LABEL, timeout=900, ) # Verify everything running fine logger.info( "Verifying all resources are Running and matches expected result") self.sanity_helpers.health_check(tries=120) # Verify default backingstore/bucketclass default_bs = OCP(kind=constants.BACKINGSTORE, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE).get( resource_name=DEFAULT_NOOBAA_BACKINGSTORE) default_bc = OCP(kind=constants.BUCKETCLASS, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE).get( resource_name=DEFAULT_NOOBAA_BUCKETCLASS) assert (default_bs["status"]["phase"] == default_bc["status"]["phase"] == constants.STATUS_READY ), "Failed: Default bs/bc are not in ready state" # Create OBCs logger.info("Creating OBCs after noobaa rebuild") bucket_factory(amount=3, interface="OC", verify_health=True)
class ElasticSearch(object): """ ElasticSearch Environment """ def __init__(self): """ Initializer function """ log.info('Initializing the Elastic-Search environment object') self.namespace = "elastic-system" self.eck_path = "https://download.elastic.co/downloads/eck/1.1.2" self.eck_file = "all-in-one.yaml" self.pvc = "ocs_ci/templates/app-pods/es-pvc.yaml" self.crd = "ocs_ci/templates/app-pods/esq.yaml" self.lspid = None # Creating some different types of OCP objects self.ocp = OCP( kind="pod", resource_name="elastic-operator-0", namespace=self.namespace ) self.ns_obj = OCP(kind='namespace', namespace=self.namespace) self.es = OCP( resource_name="quickstart-es-http", namespace=self.namespace ) self.elasticsearch = OCP(namespace=self.namespace, kind='elasticsearch') self.password = OCP( kind='secret', resource_name='quickstart-es-elastic-user', namespace=self.namespace ) # Fetch the all-in-one.yaml from the official repository self._get_eck_file() # Deploy the ECK all-in-one.yaml file self._deploy_eck() # Deploy the Elastic-Search server self._deploy_es() # Verify that ES is Up & Running timeout = 600 while timeout > 0: if self.get_health(): log.info('The ElasticSearch server is ready !') break else: log.warning('The ElasticSearch server is not ready yet') log.info('going to sleep gor 30 sec. before next check') time.sleep(30) timeout -= 30 # Starting LocalServer process - port forwarding self.local_server() def _get_eck_file(self): """ Getting the ECK file from the official Elasticsearch web site and store it as a temporary file. Current version is 1.1.2, this need to be update with new versions, after testing it, and also it may need to update the CRD file (esq.yaml) with the new version as well. """ self.dir = tempfile.mkdtemp(prefix='elastic-system_') src_file = f'{self.eck_path}/{self.eck_file}' trg_file = f'{self.dir}/{self.eck_file}' log.info(f'Retrieving the ECK CR file from {src_file} into {trg_file}') try: urllib.request.urlretrieve(src_file, trg_file) except urllib.error.HTTPError as e: log.error(f'Can not connect to {src_file} : {e}') raise e def _deploy_eck(self): """ Deploying the ECK environment for the Elasticsearch, and make sure it is in Running mode """ log.info('Deploying the ECK environment for the ES cluster') self.ocp.apply(f'{self.dir}/{self.eck_file}') for es_pod in TimeoutSampler( 300, 10, get_pod_name_by_pattern, 'elastic-operator', self.namespace ): try: if es_pod[0] is not None: self.eckpod = es_pod[0] log.info(f'The ECK pod {self.eckpod} is ready !') break except IndexError: log.info('ECK operator pod not ready yet') def get_ip(self): """ This function return the IP address of the Elasticsearch cluster. this IP is to use inside the OCP cluster Return str : String that represent the Ip Address. """ return self.es.get()["spec"]["clusterIP"] def get_port(self): """ This function return the port of the Elasticsearch cluster. Return str : String that represent the port. """ return self.es.get()["spec"]["ports"][0]["port"] def _deploy_es(self): log.info('Deploy the PVC for the ElasticSearch cluster') self.ocp.apply(self.pvc) log.info('Deploy the ElasticSearch cluster') self.ocp.apply(self.crd) for es_pod in TimeoutSampler( 300, 20, get_pod_name_by_pattern, 'quickstart-es-default', self.namespace ): try: if es_pod[0] is not None: self.espod = es_pod[0] log.info(f'The ElasticSearch pod {self.espod} Started') break except IndexError: log.info('elasticsearch pod not ready yet') es_pod = OCP(kind='pod', namespace=self.namespace) log.info('Waiting for ElasticSearch to Run') assert es_pod.wait_for_resource( condition=constants.STATUS_RUNNING, resource_name=self.espod, sleep=30, timeout=600 ) log.info('Elastic Search is ready !!!') def get_health(self): """ This method return the health status of the Elasticsearch. Returns: bool : True if the status is green (OK) otherwise - False """ return self.elasticsearch.get()['items'][0]['status']['health'] == 'green' def get_password(self): """ This method return the password used to connect the Elasticsearch. Returns: str : The password as text """ return base64.b64decode(self.password.get()['data']['elastic']).decode('utf-8') def cleanup(self): """ Cleanup the environment from all Elasticsearch components, and from the port forwarding process. """ log.info('Teardown the Elasticsearch environment') log.info(f'Killing the local server process ({self.lspid})') os.kill(self.lspid, signal.SIGKILL) log.info('Deleting all resources') subprocess.run(f'oc delete -f {self.crd}', shell=True) subprocess.run(f'oc delete -f {self.eck_file}', shell=True, cwd=self.dir) self.ns_obj.wait_for_delete(resource_name=self.namespace) def local_server(self): """ Starting sub-process that will do port-forwarding, to allow access from outside the open-shift cluster into the Elasticsearch server. """ cmd = f'oc -n {self.namespace } ' cmd += f'port-forward service/quickstart-es-http {self.get_port()}' log.info(f'Going to run : {cmd}') proc = subprocess.Popen(cmd, shell=True) log.info(f'Starting LocalServer with PID of {proc.pid}') self.lspid = proc.pid
class RipSaw(object): """ Workload operation using RipSaw """ def __init__(self, **kwargs): """ Initializer function Args: kwargs (dict): Following kwargs are valid repo: Ripsaw repo to used - a github link branch: branch to use from the repo namespace: namespace for the operator Example Usage: r1 = RipSaw() r1.apply_crd(crd='ripsaw_v1alpha1_ripsaw_crd.yaml') # use oc apply to apply custom modified bench my_custom_bench = my_custom_bench.yaml run_cmd('oc apply -f my_custom_bench') """ self.args = kwargs self.repo = self.args.get('repo', 'https://github.com/cloud-bulldozer/ripsaw') self.branch = self.args.get('branch', 'master') self.namespace = self.args.get('namespace', RIPSAW_NAMESPACE) self.pgsql_is_setup = False self.ocp = OCP() self.ns_obj = OCP(kind='namespace') self.pod_obj = OCP(namespace=RIPSAW_NAMESPACE, kind='pod') self._create_namespace() self._clone_ripsaw() def _create_namespace(self): """ create namespace for RipSaw """ self.ocp.new_project(self.namespace) def _clone_ripsaw(self): """ clone the ripaw repo """ self.dir = tempfile.mkdtemp(prefix='ripsaw_') try: log.info(f'cloning ripsaw in {self.dir}') git_clone_cmd = f'git clone -b {self.branch} {self.repo} ' run(git_clone_cmd, shell=True, cwd=self.dir, check=True) self.crd = 'resources/crds/' self.operator = 'resources/operator.yaml' except (CommandFailed, CalledProcessError) as cf: log.error('Error during cloning of ripsaw repository') raise cf def apply_crd(self, crd): """ Apply the CRD Args: crd (str): Name of file to apply """ self.dir += '/ripsaw' run('oc apply -f deploy', shell=True, check=True, cwd=self.dir) run(f'oc apply -f {crd}', shell=True, check=True, cwd=self.dir) run(f'oc apply -f {self.operator}', shell=True, check=True, cwd=self.dir) def get_uuid(self, benchmark): """ Getting the UUID of the test. when ripsaw used for running a benchmark tests, each run get its own UUID, so the results in the elastic-search server can be sorted. Args: benchmark (str): the name of the main pod in the test Return: str: the UUID of the test """ count = 0 while count <= 5: try: output = self.pod_obj.exec_oc_cmd(f'exec {benchmark} -- env') break except CommandFailed: time.sleep(3) count += 1 uuid = '' if output: for line in output.split(): if 'uuid=' in line: uuid = line.split('=')[1] break log.info(f'The UUID of the test is : {uuid}') else: log.error(f'Can not get the UUID from {benchmark}') return uuid def cleanup(self): run(f'oc delete -f {self.crd}', shell=True, cwd=self.dir) run(f'oc delete -f {self.operator}', shell=True, cwd=self.dir) run('oc delete -f deploy', shell=True, cwd=self.dir) run_cmd(f'oc delete project {self.namespace}') self.ns_obj.wait_for_delete(resource_name=self.namespace) # Reset namespace to default switch_to_default_rook_cluster_project()
class PillowFight(object): """ Workload operation using PillowFight This class was modelled after the RipSaw class in this directory. """ WAIT_FOR_TIME = 600 MIN_ACCEPTABLE_OPS_PER_SEC = 1000 MAX_ACCEPTABLE_RESPONSE_TIME = 1000 def __init__(self, **kwargs): """ Initializer function Args: kwargs (dict): Following kwargs are valid repo: PillowFight repo to used - a github link branch: branch to use from the repo namespace: namespace for the operator Example Usage: r1 = PillowFight() r1.run_pillowfights() # To run a private yaml my_custom_bench = my_custom_bench.yaml run_cmd('oc apply -f my_custom_bench') # To get pillowfight data from log file data = r1.extract_data(log_file) # To do basic sanity checking of data r1.sanity_check(data) """ self.args = kwargs self.namespace = self.args.get( 'namespace', 'couchbase-operator-namespace') self.ocp = OCP() self.ns_obj = OCP(kind='namespace') self.pod_obj = OCP(kind='pod') self.up_check = OCP(namespace=constants.COUCHBASE_OPERATOR) self.logs = tempfile.mkdtemp(prefix='pf_logs_') def run_pillowfights(self, replicas=1, num_items=None, num_threads=None): """ loop through all the yaml files extracted from the pillowfight repo and run them. Run oc logs on the results and save the logs in self.logs directory Args: replicas (int): Number of pod replicas num_items (int): Number of items to be loaded to the cluster num_threads (int): Number of threads """ ocp_local = OCP(namespace=self.namespace) pf_files = listdir(constants.TEMPLATE_PILLOWFIGHT_DIR) for i in range(replicas): for pf_yaml in pf_files: pf_fullpath = join(constants.TEMPLATE_PILLOWFIGHT_DIR, pf_yaml) if not pf_fullpath.endswith('.yaml'): continue if not isfile(pf_fullpath): continue # for basic-fillowfight.yaml pfight = templating.load_yaml(pf_fullpath) pfight['metadata']['name'] = 'pillowfight-rbd-simple' + f"{i}" # num of items pfight['spec']['template']['spec']['containers'][0]['command'][4] = str( num_items) if num_items else '20000' # num of threads pfight['spec']['template']['spec']['containers'][0]['command'][13] = str( num_threads) if num_threads else '20' lpillowfight = OCS(**pfight) lpillowfight.create() pods_info = {} for pillowfight_pods in TimeoutSampler( self.WAIT_FOR_TIME, 3, get_pod_name_by_pattern, 'pillowfight', constants.COUCHBASE_OPERATOR ): try: if len(pillowfight_pods) == replicas: counter = 0 for pf_pod in pillowfight_pods: pod_info = self.up_check.exec_oc_cmd( f"get pods {pf_pod} -o json" ) pf_status = pod_info['status']['containerStatuses'][0]['state'] if 'terminated' in pf_status: pf_completion_info = pf_status['terminated']['reason'] counter += 1 pods_info.update({pf_pod: pf_completion_info}) if counter == replicas: break except IndexError: log.info("Pillowfight not yet completed") logging.info(pods_info) pf_yaml = pf_files[0] # for basic-fillowfight.yaml for pod, pf_completion_info in pods_info.items(): if pf_completion_info == 'Completed': pf_endlog = f'{pod}.log' pf_log = join(self.logs, pf_endlog) data_from_log = ocp_local.exec_oc_cmd( f"logs -f {pod} --ignore-errors", out_yaml_format=False ) data_from_log = data_from_log.replace('\x00', '') with open(pf_log, 'w') as fd: fd.write(data_from_log) elif pf_completion_info == 'Error': raise Exception( f"Pillowfight {pf_yaml} failed to complete" ) def analyze_all(self): """ Analyze the data extracted into self.logs files """ for path in listdir(self.logs): full_path = join(self.logs, path) logging.info(f'Analyzing {full_path}') with open(full_path, 'r') as fdesc: data_from_log = fdesc.read() log_data = self.parse_pillowfight_log(data_from_log) self.sanity_check(log_data) def sanity_check(self, stats): """ Make sure the worst cases for ops per second and response times are within an acceptable range. """ stat1 = min(stats['opspersec']) if stat1 < self.MIN_ACCEPTABLE_OPS_PER_SEC: raise Exception( f"Worst OPS/SEC value reported is {stat1}" ) stat2 = max(stats['resptimes'].keys()) / 1000 if stat2 > self.MAX_ACCEPTABLE_RESPONSE_TIME: raise Exception( f"Worst response time reported is {stat2} milliseconds" ) def parse_pillowfight_log(self, data_from_log): """ Run oc logs on the pillowfight pod passed in. Cleanup the output from oc logs to handle peculiarities in the couchbase log results, and generate a summary of the results. The dictionary returned has two values; 'opspersec' and 'resptimes'. opspersec is a list of ops per second numbers reported.' resptimes is a dictionary index by the max response time of a range. Each entry in resptimes contains a minimum response time for that range, and a count of how many messages fall within that range. Args: data_from_log (str): log data Returns: dict: ops per sec and response time information """ # The data in the couchbase logs is kind of abnormal. # It contains histograms with invalid unicode charaters for yaml # output (which is why out_yaml_format=False is used). # It also seems to write a block of text inside another block at # an unpredictable location. The value good_txt below is the output # of the log with that data removed.. # # So what's left is a list of OPS/SEC values and a histogram of # response times. This routine organizes that data. ops_per_sec = [] resp_hist = {} log.info( "*******Couchbase raw output log*********\n" f"{data_from_log}" ) lines = data_from_log.split("\n") for dline in lines: try: if dline.startswith("OPS/SEC"): dfields = dline.split(" ") dnumb = int(dfields[-1].strip()) ops_per_sec.append(dnumb) if re.match('^\\[\\d+ +- \\d+ *\\][um]s \\|#* - \\d+', dline): for element in ["[", "]", "|", "-", "#"]: dline = dline.replace(element, " ") parts = dline.split() i1 = int(parts[0]) i2 = int(parts[1]) if parts[2] == 'ms': i1 *= 1000 i2 *= 1000 resp_hist[i2] = {'minindx': i1, 'number': int(parts[3])} except ValueError: log.info(f"{dline} -- contains invalid data") ret_data = {'opspersec': ops_per_sec, 'resptimes': resp_hist} return ret_data def cleanup(self): """ Remove pillowfight pods and temp files """ rmtree(self.logs) nsinfo = self.pod_obj.exec_oc_cmd(command="get namespace") if constants.COUCHBASE_OPERATOR in nsinfo: self.pod_obj.exec_oc_cmd( command=f"delete namespace {constants.COUCHBASE_OPERATOR}" ) self.ns_obj.wait_for_delete(resource_name=constants.COUCHBASE_OPERATOR)
class AMQ(object): """ Workload operation using AMQ """ def __init__(self, **kwargs): """ Initializer function Args: kwargs (dict): Following kwargs are valid namespace: namespace for the operator repo: AMQ repo where all necessary yaml file are there - a github link branch: branch to use from the repo """ self.args = kwargs self.repo = self.args.get('repo', constants.KAFKA_OPERATOR) self.branch = self.args.get('branch', 'master') self.ocp = OCP() self.ns_obj = OCP(kind='namespace') self.pod_obj = OCP(kind='pod') self.kafka_obj = OCP(kind='Kafka') self.kafka_connect_obj = OCP(kind="KafkaConnect") self.kafka_bridge_obj = OCP(kind="KafkaBridge") self.kafka_topic_obj = OCP(kind="KafkaTopic") self.kafka_user_obj = OCP(kind="KafkaUser") self.amq_is_setup = False self.messaging = False self.benchmark = False self.dir = tempfile.mkdtemp(prefix='amq_') self._clone_amq() def _clone_amq(self): """ clone the amq repo """ try: log.info(f'cloning amq in {self.dir}') git_clone_cmd = f'git clone -b {self.branch} {self.repo} ' run(git_clone_cmd, shell=True, cwd=self.dir, check=True) self.amq_dir = "strimzi-kafka-operator/install/cluster-operator/" self.amq_kafka_pers_yaml = "strimzi-kafka-operator/examples/kafka/kafka-persistent.yaml" self.amq_kafka_connect_yaml = "strimzi-kafka-operator/examples/connect/kafka-connect.yaml" self.amq_kafka_bridge_yaml = "strimzi-kafka-operator/examples/bridge/kafka-bridge.yaml" self.kafka_topic_yaml = "strimzi-kafka-operator/examples/topic/kafka-topic.yaml" self.kafka_user_yaml = "strimzi-kafka-operator/examples/user/kafka-user.yaml" self.hello_world_producer_yaml = constants.HELLO_WORLD_PRODUCER_YAML self.hello_world_consumer_yaml = constants.HELLO_WORLD_CONSUMER_YAML except (CommandFailed, CalledProcessError) as cf: log.error('Error during cloning of amq repository') raise cf def create_namespace(self, namespace): """ create namespace for amq Args: namespace (str): Namespace for amq pods """ self.ocp.new_project(namespace) def setup_amq_cluster_operator(self, namespace=constants.AMQ_NAMESPACE): """ Function to setup amq-cluster_operator, the file is pulling from github it will make sure cluster-operator pod is running Args: namespace (str): Namespace for AMQ pods """ # Namespace for amq try: self.create_namespace(namespace) except CommandFailed as ef: if f'project.project.openshift.io "{namespace}" already exists' not in str( ef): raise ef # Create strimzi-cluster-operator pod run( f"for i in `(ls strimzi-kafka-operator/install/cluster-operator/)`;" f"do sed 's/{namespace}/myproject/g' strimzi-kafka-operator/install/cluster-operator/$i;done", shell=True, check=True, cwd=self.dir) run(f'oc apply -f {self.amq_dir} -n {namespace}', shell=True, check=True, cwd=self.dir) time.sleep(10) # Check strimzi-cluster-operator pod created if self.is_amq_pod_running(pod_pattern="cluster-operator", expected_pods=1): log.info("strimzi-cluster-operator pod is in running state") else: raise ResourceWrongStatusException( "strimzi-cluster-operator pod is not getting to running state") def is_amq_pod_running(self, pod_pattern, expected_pods, namespace=constants.AMQ_NAMESPACE): """ The function checks if provided pod_pattern finds a pod and if the status is running or not Args: pod_pattern (str): the pattern for pod expected_pods (int): Number of pods namespace (str): Namespace for amq pods Returns: bool: status of pod: True if found pod is running """ _rc = True for pod in TimeoutSampler(300, 10, get_pod_name_by_pattern, pod_pattern, namespace): try: if pod is not None and len(pod) == expected_pods: amq_pod = pod break except IndexError as ie: log.error(" pod not ready yet") raise ie # checking pod status for pod in amq_pod: if (self.pod_obj.wait_for_resource( condition='Running', resource_name=pod, timeout=1600, sleep=30, )): log.info(f"{pod} pod is up and running") else: _rc = False log.error(f"{pod} pod is not running") return _rc def setup_amq_kafka_persistent(self, sc_name, size=100, replicas=3): """ Function to setup amq-kafka-persistent, the file is pulling from github it will make kind: Kafka and will make sure the status is running Args: sc_name (str): Name of sc size (int): Size of the storage in Gi replicas (int): Number of kafka and zookeeper pods to be created return : kafka_persistent """ try: kafka_persistent = templating.load_yaml( os.path.join(self.dir, self.amq_kafka_pers_yaml)) kafka_persistent['spec']['kafka']['replicas'] = replicas kafka_persistent['spec']['kafka']['storage']['volumes'][0][ 'class'] = sc_name kafka_persistent['spec']['kafka']['storage']['volumes'][0][ 'size'] = f"{size}Gi" kafka_persistent['spec']['zookeeper']['replicas'] = replicas kafka_persistent['spec']['zookeeper']['storage']['class'] = sc_name kafka_persistent['spec']['zookeeper']['storage'][ 'size'] = f"{size}Gi" self.kafka_persistent = OCS(**kafka_persistent) self.kafka_persistent.create() except (CommandFailed, CalledProcessError) as cf: log.error('Failed during setup of AMQ Kafka-persistent') raise cf time.sleep(40) if self.is_amq_pod_running(pod_pattern="my-cluster", expected_pods=(replicas * 2) + 1): return self.kafka_persistent else: raise ResourceWrongStatusException( "my-cluster-kafka and my-cluster-zookeeper " "Pod is not getting to running state") def setup_amq_kafka_connect(self): """ The function is to setup amq-kafka-connect, the yaml file is pulling from github it will make kind: KafkaConnect and will make sure the status is running Returns: kafka_connect object """ try: kafka_connect = templating.load_yaml( os.path.join(self.dir, self.amq_kafka_connect_yaml)) self.kafka_connect = OCS(**kafka_connect) self.kafka_connect.create() except (CommandFailed, CalledProcessError) as cf: log.error('Failed during setup of AMQ KafkaConnect') raise cf if self.is_amq_pod_running(pod_pattern="my-connect-cluster-connect", expected_pods=1): return self.kafka_connect else: raise ResourceWrongStatusException( "my-connect-cluster-connect pod is not getting to running state" ) def setup_amq_kafka_bridge(self): """ Function to setup amq-kafka, the file file is pulling from github it will make kind: KafkaBridge and will make sure the pod status is running Return: kafka_bridge object """ try: kafka_bridge = templating.load_yaml( os.path.join(self.dir, self.amq_kafka_bridge_yaml)) self.kafka_bridge = OCS(**kafka_bridge) self.kafka_bridge.create() except (CommandFailed, CalledProcessError) as cf: log.error('Failed during setup of AMQ KafkaConnect') raise cf # Making sure the kafka_bridge is running if self.is_amq_pod_running(pod_pattern="my-bridge-bridge", expected_pods=1): return self.kafka_bridge else: raise ResourceWrongStatusException( "kafka_bridge_pod pod is not getting to running state") def create_kafka_topic(self, name='my-topic', partitions=1, replicas=1): """ Creates kafka topic Args: name (str): Name of the kafka topic partitions (int): Number of partitions replicas (int): Number of replicas Return: kafka_topic object """ try: kafka_topic = templating.load_yaml( os.path.join(self.dir, self.kafka_topic_yaml)) kafka_topic["metadata"]["name"] = name kafka_topic["spec"]["partitions"] = partitions kafka_topic["spec"]["replicas"] = replicas self.kafka_topic = OCS(**kafka_topic) self.kafka_topic.create() except (CommandFailed, CalledProcessError) as cf: log.error('Failed during creating of Kafka topic') raise cf # Making sure kafka topic created if self.kafka_topic_obj.get(resource_name=name): return self.kafka_topic else: raise ResourceWrongStatusException("kafka topic is not created") def create_kafka_user(self, name="my-user"): """ Creates kafka user Args: name (str): Name of the kafka user Return: kafka_user object """ try: kafka_user = templating.load_yaml( os.path.join(self.dir, self.kafka_user_yaml)) kafka_user["metadata"]["name"] = name self.kafka_user = OCS(**kafka_user) self.kafka_user.create() except (CommandFailed, CalledProcessError) as cf: log.error('Failed during creating of Kafka user') raise cf # Making sure kafka user created if self.kafka_user_obj.get(resource_name=name): return self.kafka_user else: raise ResourceWrongStatusException("kafka user is not created") def create_producer_pod(self, num_of_pods=1, value='10000'): """ Creates producer pods Args: num_of_pods (int): Number of producer pods to be created value (str): Number of the messages to be sent Returns: producer pod object """ try: producer_pod = templating.load_yaml( constants.HELLO_WORLD_PRODUCER_YAML) producer_pod["spec"]["replicas"] = num_of_pods producer_pod["spec"]["template"]["spec"]["containers"][0]["env"][ 4]["value"] = value self.producer_pod = OCS(**producer_pod) self.producer_pod.create() except (CommandFailed, CalledProcessError) as cf: log.error('Failed during creation of producer pod') raise cf # Making sure the producer pod is running if self.is_amq_pod_running(pod_pattern="hello-world-producer", expected_pods=num_of_pods): return self.producer_pod else: raise ResourceWrongStatusException( "producer pod is not getting to running state") def create_consumer_pod(self, num_of_pods=1, value='10000'): """ Creates producer pods Args: num_of_pods (int): Number of consumer pods to be created value (str): Number of messages to be received Returns: consumer pod object """ try: consumer_pod = templating.load_yaml( constants.HELLO_WORLD_CONSUMER_YAML) consumer_pod["spec"]["replicas"] = num_of_pods consumer_pod["spec"]["template"]["spec"]["containers"][0]["env"][ 4]["value"] = value self.consumer_pod = OCS(**consumer_pod) self.consumer_pod.create() except (CommandFailed, CalledProcessError) as cf: log.error('Failed during creation of consumer pod') raise cf # Making sure the producer pod is running if self.is_amq_pod_running(pod_pattern="hello-world-consumer", expected_pods=num_of_pods): return self.consumer_pod else: raise ResourceWrongStatusException( "consumer pod is not getting to running state") def validate_msg(self, pod, namespace=constants.AMQ_NAMESPACE, value='10000', since_time=1800): """ Validate if messages are sent or received Args: pod (str): Name of the pod namespace (str): Namespace of the pod value (str): Number of messages are sent since_time (int): Number of seconds to required to sent the msg Returns: bool : True if all messages are sent/received """ cmd = f"oc logs -n {namespace} {pod} --since={since_time}s" msg = run_cmd(cmd) if msg.find(f"Hello world - {int(value) - 1} ") is -1: return False else: return True def validate_messages_are_produced(self, namespace=constants.AMQ_NAMESPACE, value='10000', since_time=1800): """ Validates if all messages are sent in producer pod Args: namespace (str): Namespace of the pod value (str): Number of messages are sent since_time (int): Number of seconds to required to sent the msg Raises exception on failures """ # ToDo: Support multiple topics and users producer_pod_objs = [ get_pod_obj(pod) for pod in get_pod_name_by_pattern( 'hello-world-produce', namespace) ] for pod in producer_pod_objs: for msg in TimeoutSampler(900, 30, self.validate_msg, pod.name, namespace, value, since_time): if msg: break log.error("Few messages are not sent") raise Exception("All messages are not sent from the producer pod") def validate_messages_are_consumed(self, namespace=constants.AMQ_NAMESPACE, value='10000', since_time=1800): """ Validates if all messages are received in consumer pod Args: namespace (str): Namespace of the pod value (str): Number of messages are recieved since_time (int): Number of seconds to required to receive the msg Raises exception on failures """ # ToDo: Support multiple topics and users consumer_pod_objs = [ get_pod_obj(pod) for pod in get_pod_name_by_pattern( 'hello-world-consumer', namespace) ] for pod in consumer_pod_objs: for msg in TimeoutSampler(900, 30, self.validate_msg, pod.name, namespace, value, since_time): if msg: log.info( "Consumer pod received all messages sent by producer") break log.error("Few messages are not received") raise Exception("Consumer pod received all messages sent by producer") def run_in_bg(self, namespace=constants.AMQ_NAMESPACE, value='10000', since_time=1800): """ Validate messages are produced and consumed in bg Args: namespace (str): Namespace of the pod value (str): Number of messages to be sent and received since_time (int): Number of seconds to required to sent and receive msg """ # Todo: Check for each messages sent and received log.info("Running open messages on pod in bg") threads = [] thread1 = Thread(target=self.validate_messages_are_produced, args=(namespace, value, since_time)) thread1.start() time.sleep(10) threads.append(thread1) thread2 = Thread(target=self.validate_messages_are_consumed, args=(namespace, value, since_time)) thread2.start() time.sleep(10) threads.append(thread2) return threads def run_amq_benchmark(self, benchmark_pod_name="benchmark", kafka_namespace=constants.AMQ_NAMESPACE, tiller_namespace=AMQ_BENCHMARK_NAMESPACE, num_of_clients=8, worker=None, timeout=1800, amq_workload_yaml=None, run_in_bg=False): """ Run benchmark pod and get the results Args: benchmark_pod_name (str): Name of the benchmark pod kafka_namespace (str): Namespace where kafka cluster created tiller_namespace (str): Namespace where tiller pod needs to be created num_of_clients (int): Number of clients to be created worker (str) : Loads to create on workloads separated with commas e.g http://benchmark-worker-0.benchmark-worker:8080, http://benchmark-worker-1.benchmark-worker:8080 timeout (int): Time to complete the run amq_workload_yaml (dict): Contains amq workloads information keys and values :name (str): Name of the workloads :topics (int): Number of topics created :partitions_per_topic (int): Number of partitions per topic :message_size (int): Message size :payload_file (str): Load to run on workload :subscriptions_per_topic (int): Number of subscriptions per topic :consumer_per_subscription (int): Number of consumers per subscription :producers_per_topic (int): Number of producers per topic :producer_rate (int): Producer rate :consumer_backlog_sizegb (int): Size of block in gb :test_duration_minutes (int): Time to run the workloads run_in_bg (bool): On true the workload will run in background Return: result (str/Thread obj): Returns benchmark run information if run_in_bg is False. Otherwise a thread of the amq workload execution """ # Namespace for to helm/tiller try: self.create_namespace(tiller_namespace) except CommandFailed as ef: if f'project.project.openshift.io "{tiller_namespace}" already exists' not in str( ef): raise ef # Create rbac file try: sa_tiller = list( templating.load_yaml(constants.AMQ_RBAC_YAML, multi_document=True)) sa_tiller[0]["metadata"]["namespace"] = tiller_namespace sa_tiller[1]["subjects"][0]["namespace"] = tiller_namespace self.sa_tiller = OCS(**sa_tiller[0]) self.crb_tiller = OCS(**sa_tiller[1]) self.sa_tiller.create() self.crb_tiller.create() except (CommandFailed, CalledProcessError) as cf: log.error('Failed during creation of service account tiller') raise cf # Install helm cli (version v2.16.0 as we need tiller component) # And create tiller pods wget_cmd = f"wget -c --read-timeout=5 --tries=0 {URL}" untar_cmd = "tar -zxvf helm-v2.16.1-linux-amd64.tar.gz" tiller_cmd = ( f"linux-amd64/helm init --tiller-namespace {tiller_namespace}" f" --service-account {tiller_namespace}") exec_cmd(cmd=wget_cmd, cwd=self.dir) exec_cmd(cmd=untar_cmd, cwd=self.dir) exec_cmd(cmd=tiller_cmd, cwd=self.dir) # Validate tiller pod is running log.info("Waiting for 30s for tiller pod to come up") time.sleep(30) if self.is_amq_pod_running(pod_pattern="tiller", expected_pods=1, namespace=tiller_namespace): log.info("Tiller pod is running") else: raise ResourceWrongStatusException( "Tiller pod is not in running state") # Create benchmark pods log.info("Create benchmark pods") values = templating.load_yaml(constants.AMQ_BENCHMARK_VALUE_YAML) values["numWorkers"] = num_of_clients benchmark_cmd = ( f"linux-amd64/helm install {constants.AMQ_BENCHMARK_POD_YAML}" f" --name {benchmark_pod_name} --tiller-namespace {tiller_namespace}" ) exec_cmd(cmd=benchmark_cmd, cwd=self.dir) # Making sure the benchmark pod and clients are running if self.is_amq_pod_running(pod_pattern="benchmark", expected_pods=(1 + num_of_clients), namespace=tiller_namespace): log.info("All benchmark pod is up and running") else: raise ResourceWrongStatusException( "Benchmark pod is not getting to running state") # Update commonConfig with kafka-bootstrap server details driver_kafka = templating.load_yaml(constants.AMQ_DRIVER_KAFKA_YAML) driver_kafka['commonConfig'] = ( f'bootstrap.servers=my-cluster-kafka-bootstrap.{kafka_namespace}.svc.cluster.local:9092' ) json_file = f'{self.dir}/driver_kafka' templating.dump_data_to_json(driver_kafka, json_file) cmd = f'cp {json_file} {benchmark_pod_name}-driver:/' self.pod_obj.exec_oc_cmd(cmd) # Update the workload yaml if not amq_workload_yaml: amq_workload_yaml = templating.load_yaml( constants.AMQ_WORKLOAD_YAML) yaml_file = f'{self.dir}/amq_workload.yaml' templating.dump_data_to_temp_yaml(amq_workload_yaml, yaml_file) cmd = f'cp {yaml_file} {benchmark_pod_name}-driver:/' self.pod_obj.exec_oc_cmd(cmd) self.benchmark = True # Run the benchmark if worker: cmd = f"bin/benchmark --drivers /driver_kafka --workers {worker} /amq_workload.yaml" else: cmd = "bin/benchmark --drivers /driver_kafka /amq_workload.yaml" log.info( f"Run benchmark and running command {cmd} inside the benchmark pod " ) if run_in_bg: executor = ThreadPoolExecutor(1) result = executor.submit(self.run_amq_workload, cmd, benchmark_pod_name, tiller_namespace, timeout) return result pod_obj = get_pod_obj(name=f"{benchmark_pod_name}-driver", namespace=tiller_namespace) result = pod_obj.exec_cmd_on_pod(command=cmd, out_yaml_format=False, timeout=timeout) return result def run_amq_workload(self, command, benchmark_pod_name, tiller_namespace, timeout): """ Runs amq workload in bg Args: command (str): Command to run on pod benchmark_pod_name (str): Pod name tiller_namespace (str): Namespace of pod timeout (int): Time to complete the run Returns: result (str): Returns benchmark run information """ pod_obj = get_pod_obj(name=f"{benchmark_pod_name}-driver", namespace=tiller_namespace) return pod_obj.exec_cmd_on_pod(command=command, out_yaml_format=False, timeout=timeout) def validate_amq_benchmark(self, result, amq_workload_yaml, benchmark_pod_name="benchmark"): """ Validates amq benchmark run Args: result (str): Benchmark run information amq_workload_yaml (dict): AMQ workload information benchmark_pod_name (str): Name of the benchmark pod Returns: res_dict (dict): Returns the dict output on success, Otherwise none """ res_dict = {} res_dict['topic'] = amq_workload_yaml['topics'] res_dict['partitionsPerTopic'] = amq_workload_yaml[ 'partitionsPerTopic'] res_dict['messageSize'] = amq_workload_yaml['messageSize'] res_dict['payloadFile'] = amq_workload_yaml['payloadFile'] res_dict['subscriptionsPerTopic'] = amq_workload_yaml[ 'subscriptionsPerTopic'] res_dict['producersPerTopic'] = amq_workload_yaml['producersPerTopic'] res_dict['consumerPerSubscription'] = amq_workload_yaml[ 'consumerPerSubscription'] res_dict['producerRate'] = amq_workload_yaml['producerRate'] # Validate amq benchmark is completed for part in result.split(): if '.json' in part: workload_json_file = part if workload_json_file: cmd = f'rsync {benchmark_pod_name}-driver:{workload_json_file} {self.dir} -n {AMQ_BENCHMARK_NAMESPACE}' self.pod_obj.exec_oc_cmd(command=cmd, out_yaml_format=False) # Parse the json file with open(f'{self.dir}/{workload_json_file}') as json_file: data = json.load(json_file) res_dict['AvgpublishRate'] = sum(data.get('publishRate')) / len( data.get('publishRate')) res_dict['AvgConsumerRate'] = sum(data.get('consumeRate')) / len( data.get('consumeRate')) res_dict['AvgMsgBacklog'] = sum(data.get('backlog')) / len( data.get('backlog')) res_dict['publishLatencyAvg'] = sum( data.get('publishLatencyAvg')) / len( data.get('publishLatencyAvg')) res_dict['aggregatedPublishLatencyAvg'] = data.get( 'aggregatedPublishLatencyAvg') res_dict['aggregatedPublishLatency50pct'] = data.get( 'aggregatedPublishLatency50pct') res_dict['aggregatedPublishLatency75pct'] = data.get( 'aggregatedPublishLatency75pct') res_dict['aggregatedPublishLatency95pct'] = data.get( 'aggregatedPublishLatency95pct') res_dict['aggregatedPublishLatency99pct'] = data.get( 'aggregatedPublishLatency99pct') res_dict['aggregatedPublishLatency999pct'] = data.get( 'aggregatedPublishLatency999pct') res_dict['aggregatedPublishLatency9999pct'] = data.get( 'aggregatedPublishLatency9999pct') res_dict['aggregatedPublishLatencyMax'] = data.get( 'aggregatedPublishLatencyMax') res_dict['aggregatedEndToEndLatencyAvg'] = data.get( 'aggregatedEndToEndLatencyAvg') res_dict['aggregatedEndToEndLatency50pct'] = data.get( 'aggregatedEndToEndLatency50pct') res_dict['aggregatedEndToEndLatency75pct'] = data.get( 'aggregatedEndToEndLatency75pct') res_dict['aggregatedEndToEndLatency95pct'] = data.get( 'aggregatedEndToEndLatency95pct') res_dict['aggregatedEndToEndLatency99pct'] = data.get( 'aggregatedEndToEndLatency99pct') res_dict['aggregatedEndToEndLatency999pct'] = data.get( 'aggregatedEndToEndLatency999pct') res_dict['aggregatedEndToEndLatency9999pct'] = data.get( 'aggregatedEndToEndLatency9999pct') res_dict['aggregatedEndToEndLatencyMax'] = data.get( 'aggregatedEndToEndLatencyMax') else: log.error("Benchmark didn't run completely") return None amq_benchmark_pod_table = PrettyTable(['key', 'value']) for key, val in res_dict.items(): amq_benchmark_pod_table.add_row([key, val]) log.info(f'\n{amq_benchmark_pod_table}\n') return res_dict def create_messaging_on_amq(self, topic_name='my-topic', user_name="my-user", partitions=1, replicas=1, num_of_producer_pods=1, num_of_consumer_pods=1, value='10000'): """ Creates workload using Open Messaging tool on amq cluster Args: topic_name (str): Name of the topic to be created user_name (str): Name of the user to be created partitions (int): Number of partitions of topic replicas (int): Number of replicas of topic num_of_producer_pods (int): Number of producer pods to be created num_of_consumer_pods (int): Number of consumer pods to be created value (str): Number of messages to be sent and received """ self.create_kafka_topic(topic_name, partitions, replicas) self.create_kafka_user(user_name) self.create_producer_pod(num_of_producer_pods, value) self.create_consumer_pod(num_of_consumer_pods, value) self.messaging = True def setup_amq_cluster(self, sc_name, namespace=constants.AMQ_NAMESPACE, size=100, replicas=3): """ Creates amq cluster with persistent storage. Args: sc_name (str): Name of sc namespace (str): Namespace for amq cluster size (int): Size of the storage replicas (int): Number of kafka and zookeeper pods to be created """ self.setup_amq_cluster_operator(namespace) self.setup_amq_kafka_persistent(sc_name, size, replicas) self.setup_amq_kafka_connect() self.setup_amq_kafka_bridge() self.amq_is_setup = True return self def cleanup(self, kafka_namespace=constants.AMQ_NAMESPACE, tiller_namespace=AMQ_BENCHMARK_NAMESPACE): """ Clean up function, will start to delete from amq cluster operator then amq-connector, persistent, bridge, at the end it will delete the created namespace Args: kafka_namespace (str): Created namespace for amq tiller_namespace (str): Created namespace for benchmark """ if self.amq_is_setup: if self.messaging: self.consumer_pod.delete() self.producer_pod.delete() self.kafka_user.delete() self.kafka_topic.delete() if self.benchmark: # Delete the helm app try: purge_cmd = f"linux-amd64/helm delete benchmark --purge --tiller-namespace {tiller_namespace}" run(purge_cmd, shell=True, cwd=self.dir, check=True) except (CommandFailed, CalledProcessError) as cf: log.error('Failed to delete help app') raise cf # Delete the pods and namespace created self.sa_tiller.delete() self.crb_tiller.delete() run_cmd(f'oc delete project {tiller_namespace}') self.ns_obj.wait_for_delete(resource_name=tiller_namespace) self.kafka_persistent.delete() self.kafka_connect.delete() self.kafka_bridge.delete() run_cmd(f'oc delete -f {self.amq_dir}', shell=True, check=True, cwd=self.dir) run_cmd(f'oc delete project {kafka_namespace}') # Reset namespace to default switch_to_default_rook_cluster_project() self.ns_obj.wait_for_delete(resource_name=kafka_namespace)