def smallfile_run(self, es): """ Run the smallfiles workload so the elasticsearch server will have some data in it for copy Args: es (Elasticsearch): elastic search object Returns: str: the UUID of the test """ ripsaw = RipSaw() # Loading the main template yaml file for the benchmark and update some # fields with new values sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML) # Setting up the parameters for this test sf_data["spec"]["elasticsearch"]["server"] = es.get_ip() sf_data["spec"]["elasticsearch"]["port"] = es.get_port() sf_data["spec"]["workload"]["args"]["samples"] = 1 sf_data["spec"]["workload"]["args"]["operation"] = ["create"] sf_data["spec"]["workload"]["args"]["file_size"] = 4 sf_data["spec"]["workload"]["args"]["files"] = 500000 sf_data["spec"]["workload"]["args"]["threads"] = 4 sf_data["spec"]["workload"]["args"][ "storageclass"] = constants.DEFAULT_STORAGECLASS_RBD sf_data["spec"]["workload"]["args"]["storagesize"] = "100Gi" # Deploy the ripsaw operator log.info("Apply Operator CRD") ripsaw.apply_crd("resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml") # deploy the smallfile workload log.info("Running SmallFile bench") sf_obj = OCS(**sf_data) sf_obj.create() # wait for benchmark pods to get created - takes a while for bench_pod in TimeoutSampler( 240, 10, get_pod_name_by_pattern, "smallfile-client", constants.RIPSAW_NAMESPACE, ): try: if bench_pod[0] is not None: small_file_client_pod = bench_pod[0] break except IndexError: log.info("Bench pod not ready yet") bench_pod = OCP(kind="pod", namespace=constants.RIPSAW_NAMESPACE) log.info("Waiting for SmallFile benchmark to Run") bench_pod.wait_for_resource( condition=constants.STATUS_RUNNING, resource_name=small_file_client_pod, sleep=30, timeout=600, ) for item in bench_pod.get()["items"][1]["spec"]["volumes"]: if "persistentVolumeClaim" in item: break uuid = ripsaw.get_uuid(small_file_client_pod) timeout = 600 while timeout >= 0: logs = bench_pod.get_logs(name=small_file_client_pod) if "RUN STATUS DONE" in logs: break timeout -= 30 if timeout == 0: raise TimeoutError( "Timed out waiting for benchmark to complete") time.sleep(30) ripsaw.cleanup() return uuid
class TestFIOBenchmark(PASTest): """ Run FIO perf test using ripsaw benchmark """ def setup(self): """ Setting up test parameters """ log.info("Starting the test setup") self.benchmark_name = "FIO" self.client_pod_name = "fio-client" if config.PERF.get("deploy_internal_es"): self.es = ElasticSearch() else: if config.PERF.get("internal_es_server") == "": self.es = None return else: self.es = { "server": config.PERF.get("internal_es_server"), "port": config.PERF.get("internal_es_port"), "url": f"http://{config.PERF.get('internal_es_server')}:{config.PERF.get('internal_es_port')}", "parallel": True, } # verify that the connection to the elasticsearch server is OK if not super(TestFIOBenchmark, self).es_connect(): self.es = None return super(TestFIOBenchmark, self).setup() # deploy the benchmark-operator (ripsaw) self.ripsaw = RipSaw() self.ripsaw_deploy(self.ripsaw) def setting_storage_usage(self): """ Getting the storage capacity, calculate the usage of the storage and setting the workload CR rile parameters. """ # for development mode - use parameters for short test run if self.dev_mode: log.info("Setting up parameters for development mode") self.crd_data["spec"]["workload"]["args"]["filesize"] = "1GiB" self.crd_data["spec"]["workload"]["args"]["storagesize"] = "5Gi" self.crd_data["spec"]["workload"]["args"]["servers"] = 2 self.crd_data["spec"]["workload"]["args"]["samples"] = 2 self.crd_data["spec"]["workload"]["args"]["read_runtime"] = 30 self.crd_data["spec"]["workload"]["args"]["write_runtime"] = 30 self.crd_data["spec"]["workload"]["args"]["bs"] = ["64KiB"] self.total_data_set = 20 self.filesize = 3 return ceph_cluster = CephCluster() ceph_capacity = ceph_cluster.get_ceph_capacity() log.info(f"Total storage capacity is {ceph_capacity} GiB") self.total_data_set = int(ceph_capacity * 0.4) self.filesize = int( self.crd_data["spec"]["workload"]["args"]["filesize"].replace( "GiB", "")) # To make sure the number of App pods will not be more then 50, in case # of large data set, changing the size of the file each pod will work on if self.total_data_set > 500: self.filesize = int(ceph_capacity * 0.008) self.crd_data["spec"]["workload"]["args"][ "filesize"] = f"{self.filesize}GiB" # make sure that the storage size is larger then the file size self.crd_data["spec"]["workload"]["args"][ "storagesize"] = f"{int(self.filesize * 1.2)}Gi" self.crd_data["spec"]["workload"]["args"]["servers"] = int( self.total_data_set / self.filesize) log.info(f"Total Data set to work on is : {self.total_data_set} GiB") def setting_io_pattern(self, io_pattern): """ Setting the test jobs according to the io pattern - random / sequential Args: io_pattern (str): the I/O pattern to run (random / sequential) """ if io_pattern == "sequential": self.crd_data["spec"]["workload"]["args"]["jobs"] = [ "write", "read" ] self.crd_data["spec"]["workload"]["args"]["iodepth"] = 1 if io_pattern == "random": self.crd_data["spec"]["workload"]["args"]["jobs"] = [ "randwrite", "randread", ] def init_full_results(self, full_results): """ Initialize the full results object which will send to the ES server Args: full_results (obj): an empty FIOResultsAnalyse object Returns: FIOResultsAnalyse (obj): the input object fill with data """ for key in self.environment: full_results.add_key(key, self.environment[key]) # Setting the global parameters of the test full_results.add_key("dataset", f"{self.total_data_set}GiB") full_results.add_key( "file_size", self.crd_data["spec"]["workload"]["args"]["filesize"]) full_results.add_key( "servers", self.crd_data["spec"]["workload"]["args"]["servers"]) full_results.add_key( "samples", self.crd_data["spec"]["workload"]["args"]["samples"]) full_results.add_key("operations", self.crd_data["spec"]["workload"]["args"]["jobs"]) full_results.add_key("block_sizes", self.crd_data["spec"]["workload"]["args"]["bs"]) full_results.add_key( "io_depth", self.crd_data["spec"]["workload"]["args"]["iodepth"]) full_results.add_key( "jobs", self.crd_data["spec"]["workload"]["args"]["numjobs"]) full_results.add_key( "runtime", { "read": self.crd_data["spec"]["workload"]["args"]["read_runtime"], "write": self.crd_data["spec"]["workload"]["args"]["write_runtime"], }, ) full_results.add_key( "storageclass", self.crd_data["spec"]["workload"]["args"]["storageclass"]) full_results.add_key( "vol_size", self.crd_data["spec"]["workload"]["args"]["storagesize"]) return full_results def cleanup(self): """ Do cleanup in the benchmark-operator namespace. delete the benchmark, an make sure no PVC's an no PV's are left. """ log.info("Deleting FIO benchmark") self.benchmark_obj.delete() time.sleep(180) # Getting all PVCs created in the test (if left). NL = "\\n" # NewLine character command = ["oc", "get", "pvc", "-n"] command.append(constants.RIPSAW_NAMESPACE) command.append("-o") command.append("template") command.append("--template") command.append("'{{range .items}}{{.metadata.name}}{{\"" + NL + "\"}}{{end}}'") pvcs_list = run_command(command, out_format="list") log.info(f"list of all PVCs :{pvcs_list}") for pvc in pvcs_list: pvc = pvc.replace("'", "") run_command(f"oc -n {constants.RIPSAW_NAMESPACE} delete pvc {pvc}") # Getting all PVs created in the test (if left). command[2] = "pv" command[8] = ( "'{{range .items}}{{.metadata.name}} {{.spec.claimRef.namespace}}{{\"" + NL + "\"}}{{end}}'") command.remove("-n") command.remove(constants.RIPSAW_NAMESPACE) pvs_list = run_command(command, out_format="list") log.info(f"list of all PVs :{pvs_list}") for line in pvs_list: try: pv, ns = line.split(" ") pv = pv.replace("'", "") if ns == constants.RIPSAW_NAMESPACE: log.info(f"Going to delete {pv}") run_command(f"oc delete pv {pv}") except Exception: pass def run(self): """ Run the test, and wait until it finished """ self.deploy_and_wait_for_wl_to_start(timeout=900) # Getting the UUID from inside the benchmark pod self.uuid = self.ripsaw.get_uuid(self.client_pod) # Setting back the original elastic-search information if hasattr(self, "backup_es"): self.crd_data["spec"]["elasticsearch"] = self.backup_es if self.dev_mode: sleeptime = 30 else: sleeptime = 300 self.wait_for_wl_to_finish(sleep=sleeptime) try: if "Fio failed to execute" not in self.test_logs: log.info("FIO has completed successfully") except IOError: log.warning("FIO failed to complete") def teardown(self): """ The teardown of the test environment in the end. """ log.info("cleanup the environment") if hasattr(self, "ripsaw"): self.ripsaw.cleanup() if isinstance(self.es, ElasticSearch): self.es.cleanup() sleep_time = 5 log.info( f"Going to sleep for {sleep_time} Minute, for background cleanup to complete" ) time.sleep(sleep_time * 60) @pytest.mark.parametrize( argnames=["interface", "io_pattern"], argvalues=[ pytest.param( *[constants.CEPHBLOCKPOOL, "sequential"], marks=pytest.mark.polarion_id("OCS-844"), ), pytest.param( *[constants.CEPHFILESYSTEM, "sequential"], marks=pytest.mark.polarion_id("OCS-845"), ), pytest.param( *[constants.CEPHBLOCKPOOL, "random"], marks=pytest.mark.polarion_id("OCS-846"), ), pytest.param( *[constants.CEPHFILESYSTEM, "random"], marks=pytest.mark.polarion_id("OCS-847"), ), ], ) def test_fio_workload_simple(self, interface, io_pattern): """ This is a basic fio perf test - non-compressed volumes Args: interface (str): the interface that need to be tested - CephFS / RBD io_pattern (str): the I/O pattern to do - random / sequential """ # verify that there is an elasticsearch server for the benchmark if not self.es: log.error("This test must have an Elasticsearch server") return False # Getting the full path for the test logs self.full_log_path = get_full_test_logs_path(cname=self) self.full_log_path += f"-{interface}-{io_pattern}" log.info(f"Logs file path name is : {self.full_log_path}") log.info("Create resource file for fio workload") self.crd_data = templating.load_yaml(constants.FIO_CR_YAML) # Saving the Original elastic-search IP and PORT - if defined in yaml self.es_info_backup(self.es) self.set_storageclass(interface=interface) # Setting the data set to 40% of the total storage capacity self.setting_storage_usage() self.get_env_info() self.setting_io_pattern(io_pattern) self.run() # Initialize the results doc file. full_results = self.init_full_results( FIOResultsAnalyse(self.uuid, self.crd_data, self.full_log_path, self.main_es)) # Setting the global parameters of the test full_results.add_key("io_pattern", io_pattern) # Clean up fio benchmark self.cleanup() log.debug(f"Full results is : {full_results.results}") if isinstance(self.es, ElasticSearch): # Using internal deployed elasticsearch # if self.es: log.info("Getting data from internal ES") if self.main_es: self.copy_es_data(self.es) else: log.info("Dumping data from the Internal ES to tar ball file") self.es.dumping_all_data(self.full_log_path) full_results.analyze_results(self) # Analyze the results full_results.add_key("test_time", { "start": self.start_time, "end": self.end_time }) # Writing the analyzed test results to the Elastic-Search server if full_results.es_write(): log.info( f"The Result can be found at : {full_results.results_link()}") @skipif_ocs_version("<4.6") @pytest.mark.parametrize( argnames=["io_pattern", "bs", "cmp_ratio"], argvalues=[ pytest.param(*["random", "1024KiB", 60]), pytest.param(*["random", "64KiB", 60]), pytest.param(*["random", "16KiB", 60]), pytest.param(*["sequential", "1024KiB", 60]), pytest.param(*["sequential", "64KiB", 60]), pytest.param(*["sequential", "16KiB", 60]), ], ) def test_fio_compressed_workload(self, storageclass_factory, io_pattern, bs, cmp_ratio): """ This is a basic fio perf test which run on compression enabled volume Args: io_pattern (str): the I/O pattern to do - random / sequential bs (str): block size to use in the test cmp_ratio (int): the expected compression ratio """ # Getting the full path for the test logs self.full_log_path = get_full_test_logs_path(cname=self) self.full_log_path += f"-{io_pattern}-{bs}-{cmp_ratio}" log.info(f"Logs file path name is : {self.full_log_path}") log.info("Create resource file for fio workload") self.crd_data = templating.load_yaml( "ocs_ci/templates/workloads/fio/benchmark_fio_cmp.yaml") # Saving the Original elastic-search IP and PORT - if defined in yaml self.es_info_backup(self.es) log.info("Creating compressed pool & SC") sc_obj = storageclass_factory( interface=constants.CEPHBLOCKPOOL, new_rbd_pool=True, replica=3, compression="aggressive", ) sc = sc_obj.name pool_name = run_cmd( f"oc get sc {sc} -o jsonpath={{'.parameters.pool'}}") # Create fio benchmark self.crd_data["spec"]["workload"]["args"]["bs"] = [bs] self.crd_data["spec"]["workload"]["args"]["cmp_ratio"] = cmp_ratio # Setting the data set to 40% of the total storage capacity self.setting_storage_usage() self.crd_data["spec"]["workload"]["args"][ "prefill_bs"] = self.crd_data["spec"]["workload"]["args"]["bs"][0] self.get_env_info() self.crd_data["spec"]["workload"]["args"]["storageclass"] = sc self.setting_io_pattern(io_pattern) self.run() # Initialize the results doc file. full_results = self.init_full_results( FIOResultsAnalyse(self.uuid, self.crd_data, self.full_log_path, self.main_es)) # Setting the global parameters of the test full_results.add_key("io_pattern", io_pattern) if isinstance(self.es, ElasticSearch): # Using internal deployed elasticsearch # if self.es: log.info("Getting data from internal ES") if self.main_es: self.copy_es_data(self.es) else: log.info("Dumping data from the Internal ES to tar ball file") self.es.dumping_all_data(self.full_log_path) log.info("verifying compression ratio") ratio = calculate_compression_ratio(pool_name) full_results.add_key("cmp_ratio", { "expected": cmp_ratio, "actual": ratio }) log.debug(f"Full results is : {full_results.results}") full_results.analyze_results(self) # Analyze the results if (cmp_ratio + 5) < ratio or ratio < (cmp_ratio - 5): log.warning(f"The compression ratio is {ratio}% " f"while the expected ratio is {cmp_ratio}%") else: log.info(f"The compression ratio is {ratio}%") full_results.add_key("test_time", { "start": self.start_time, "end": self.end_time }) # Writing the analyzed test results to the Elastic-Search server if full_results.es_write(): log.info( f"The Result can be found at : {full_results.results_link()}") # Clean up fio benchmark self.cleanup() sc_obj.delete() sc_obj.ocp.wait_for_delete(resource_name=sc, timeout=300, sleep=5)
class TestSmallFileWorkload(PASTest): """ Deploy Ripsaw operator and run SmallFile workload SmallFile workload using https://github.com/distributed-system-analysis/smallfile smallfile is a python-based distributed POSIX workload generator which can be used to quickly measure performance for a variety of metadata-intensive workloads """ def setup(self): """ Setting up test parameters """ log.info("Starting the test setup") self.benchmark_name = "SmallFiles" self.client_pod_name = "smallfile-client" if config.PERF.get("deploy_internal_es"): self.es = ElasticSearch() else: if config.PERF.get("internal_es_server") == "": self.es = None return else: self.es = { "server": config.PERF.get("internal_es_server"), "port": config.PERF.get("internal_es_port"), "url": f"http://{config.PERF.get('internal_es_server')}:{config.PERF.get('internal_es_port')}", } # verify that the connection to the elasticsearch server is OK if not super(TestSmallFileWorkload, self).es_connect(): self.es = None return super(TestSmallFileWorkload, self).setup() # deploy the benchmark-operator (ripsaw) self.ripsaw = RipSaw() self.ripsaw_deploy(self.ripsaw) def setting_storage_usage(self, file_size, files, threads, samples): """ Getting the storage capacity, calculate the usage of the storage and setting the workload CR rile parameters. Args: file_size (int) : the size of the file to be used files (int) : number of files to use threads (int) : number of threads to be use in the test samples (int) : how meany samples to run for each test """ self.crd_data["spec"]["workload"]["args"]["file_size"] = file_size self.crd_data["spec"]["workload"]["args"]["files"] = files self.crd_data["spec"]["workload"]["args"]["threads"] = threads self.crd_data["spec"]["workload"]["args"]["samples"] = samples # Calculating the size of the volume that need to be test, it should # be at least twice in the size then the size of the files, and at # least 100Gi. # Since the file_size is in Kb and the vol_size need to be in Gb, more # calculation is needed. vol_size = int(files * threads * file_size * 3) vol_size = int(vol_size / constants.GB2KB) if vol_size < 100: vol_size = 100 self.crd_data["spec"]["workload"]["args"][ "storagesize"] = f"{vol_size}Gi" def init_full_results(self, full_results): """ Initialize the full results object which will send to the ES server Args: full_results (obj): an empty SmallFileResultsAnalyse object Returns: SmallFileResultsAnalyse (obj): the input object fill with data """ for key in self.environment: full_results.add_key(key, self.environment[key]) # Calculating the total size of the working data set - in GB full_results.add_key( "dataset", self.crd_data["spec"]["workload"]["args"]["file_size"] * self.crd_data["spec"]["workload"]["args"]["files"] * self.crd_data["spec"]["workload"]["args"]["threads"] * full_results.results["clients"] / constants.GB2KB, ) full_results.add_key( "global_options", { "files": self.crd_data["spec"]["workload"]["args"]["files"], "file_size": self.crd_data["spec"]["workload"]["args"]["file_size"], "storageclass": self.crd_data["spec"]["workload"]["args"]["storageclass"], "vol_size": self.crd_data["spec"]["workload"]["args"]["storagesize"], }, ) return full_results def run(self): log.info("Running SmallFile bench") self.deploy_and_wait_for_wl_to_start(timeout=240, sleep=10) # Getting the UUID from inside the benchmark pod self.uuid = self.ripsaw.get_uuid(self.client_pod) self.wait_for_wl_to_finish(sleep=30) try: if "RUN STATUS DONE" in self.test_logs: log.info("SmallFiles has completed successfully") return True except IOError: log.warning("SmallFiles failed to complete") return False def teardown(self): """ The teardown of the test environment in the end. """ log.info("cleanup the environment") if hasattr(self, "ripsaw"): self.ripsaw.cleanup() if isinstance(self.es, ElasticSearch): self.es.cleanup() sleep_time = 5 log.info( f"Going to sleep for {sleep_time} Minute, for background cleanup to complete" ) time.sleep(sleep_time * 60) @pytest.mark.parametrize( argnames=["file_size", "files", "threads", "samples", "interface"], argvalues=[ pytest.param( *[4, 50000, 4, 3, constants.CEPHBLOCKPOOL], marks=pytest.mark.polarion_id("OCS-1295"), ), pytest.param( *[16, 50000, 4, 3, constants.CEPHBLOCKPOOL], marks=pytest.mark.polarion_id("OCS-2020"), ), pytest.param( *[16, 200000, 4, 3, constants.CEPHBLOCKPOOL], marks=pytest.mark.polarion_id("OCS-2021"), ), pytest.param( *[4, 50000, 4, 3, constants.CEPHFILESYSTEM], marks=pytest.mark.polarion_id("OCS-2022"), ), pytest.param( *[16, 50000, 4, 3, constants.CEPHFILESYSTEM], marks=pytest.mark.polarion_id("OCS-2023"), ), ], ) @pytest.mark.polarion_id("OCS-1295") def test_smallfile_workload(self, file_size, files, threads, samples, interface): """ Run SmallFile Workload Args: file_size (int) : the size of the file to be used files (int) : number of files to use threads (int) : number of threads to be use in the test samples (int) : how meany samples to run for each test interface (str) : the volume type (rbd / cephfs) """ # verify that there is an elasticsearch server for the benchmark if not self.es: log.error("This test must have an Elasticsearch server") return False # Getting the full path for the test logs self.full_log_path = get_full_test_logs_path(cname=self) self.full_log_path += f"-{file_size}-{files}-{threads}-{samples}-{interface}" log.info(f"Logs file path name is : {self.full_log_path}") # Loading the main template yaml file for the benchmark log.info("Create resource file for smallfiles workload") self.crd_data = templating.load_yaml( constants.SMALLFILE_BENCHMARK_YAML) # Saving the Original elastic-search IP and PORT - if defined in yaml self.es_info_backup(self.es) self.set_storageclass(interface=interface) # Setting the data set to 40% of the total storage capacity self.setting_storage_usage(file_size, files, threads, samples) self.get_env_info() if not self.run(): log.error("The benchmark failed to run !") return # Setting back the original elastic-search information if self.backup_es: self.crd_data["spec"]["elasticsearch"] = self.backup_es # Initialize the results doc file. full_results = self.init_full_results( SmallFileResultsAnalyse(self.uuid, self.crd_data, self.full_log_path, self.main_es)) log.info(f"Full results is : {full_results.results}") if isinstance(self.es, ElasticSearch): # Using internal deployed elasticsearch log.info("Getting data from internal ES") if self.main_es: self.copy_es_data(self.es) full_results.read() else: log.info("Dumping data from the Internal ES to tar ball file") self.es.dumping_all_data(self.full_log_path) else: log.info(self.es) self.es = Elasticsearch(hosts=[{ "host": self.es["server"], "port": self.es["port"] }]) full_results.read() full_results.add_key("test_time", { "start": self.start_time, "end": self.end_time }) if self.main_es: full_results.es = self.main_es if not full_results.dont_check: full_results.add_key("hosts", full_results.get_clients_list()) full_results.init_full_results() full_results.aggregate_host_results() test_status = full_results.aggregate_samples_results() full_results.all_results = None if full_results.es_write(): log.info( f"The Result can be found at : {full_results.results_link()}" ) else: test_status = True assert test_status, "Test Failed !"