def setup(self): """ Setting up the environment for each performance and scale test Args: name (str): The test name that will use in the performance dashboard """ log.info("Setting up test environment") self.crd_data = None # place holder for Benchmark CDR data self.es = None # place holder for the incluster deployment elasticsearch self.es_backup = None # place holder for the elasticsearch backup self.main_es = None # place holder for the main elasticsearch object self.benchmark_obj = None # place holder for the benchmark object self.client_pod = None # Place holder for the client pod object self.dev_mode = config.RUN["cli_params"].get("dev_mode") self.pod_obj = OCP(kind="pod", namespace=benchmark_operator.BMO_NAME) # Place holders for test results file (all sub-tests together) self.results_path = "" self.results_file = "" # Collecting all Environment configuration Software & Hardware # for the performance report. self.environment = get_environment_info() self.environment["clusterID"] = get_running_cluster_id() self.get_osd_info() self.get_node_info(node_type="master") self.get_node_info(node_type="worker")
def setup(self): """ Setting up the environment for each performance and scale test Args: name (str): The test name that will use in the performance dashboard """ log.info("Setting up test environment") self.es = None # place holder for the incluster deployment elasticsearch self.es_backup = None # place holder for the elasticsearch backup self.main_es = None # place holder for the main elasticsearch object self.benchmark_obj = None # place holder for the benchmark object self.client_pod = None # Place holder for the client pod object self.dev_mode = config.RUN["cli_params"].get("dev_mode") self.pod_obj = OCP(kind="pod", namespace=benchmark_operator.BMO_NAME) self.initialize_test_crd() # Place holders for test results file (all sub-tests together) self.results_file = "" # All tests need a uuid for the ES results, benchmark-operator base test # will overrite it with uuid pulling from the benchmark pod self.uuid = uuid4().hex # Getting the full path for the test logs self.full_log_path = os.environ.get("PYTEST_CURRENT_TEST").split(" ")[0] self.full_log_path = ( self.full_log_path.replace("::", "/").replace("[", "-").replace("]", "") ) self.full_log_path = os.path.join(ocsci_log_path(), self.full_log_path) log.info(f"Logs file path name is : {self.full_log_path}") # Getting the results path as a list self.results_path = self.full_log_path.split("/") self.results_path.pop() # List of test(s) for checking the results self.workloads = [] # Collecting all Environment configuration Software & Hardware # for the performance report. self.environment = get_environment_info() self.environment["clusterID"] = get_running_cluster_id() self.ceph_cluster = CephCluster() self.used_capacity = self.get_cephfs_data() self.get_osd_info() self.get_node_info(node_type="master") self.get_node_info(node_type="worker")
def setup(self): """ Setting up the environment for each performance and scale test """ log.info("Setting up test environment") self.crd_data = None # place holder for Benchmark CDR data self.es_backup = None # place holder for the elasticsearch backup self.main_es = None # place holder for the main elasticsearch object self.benchmark_obj = None # place holder for the benchmark object self.client_pod = None # Place holder for the client pod object self.dev_mode = config.RUN["cli_params"].get("dev_mode") self.environment = get_environment_info() self.pod_obj = OCP(kind="pod")
def setup(self): """ Setting up the environment for each performance and scale test Args: name (str): The test name that will use in the performance dashboard """ log.info("Setting up test environment") self.crd_data = None # place holder for Benchmark CDR data self.es = None # place holder for the incluster deployment elasticsearch self.es_backup = None # place holder for the elasticsearch backup self.main_es = None # place holder for the main elasticsearch object self.benchmark_obj = None # place holder for the benchmark object self.client_pod = None # Place holder for the client pod object self.dev_mode = config.RUN["cli_params"].get("dev_mode") self.pod_obj = OCP(kind="pod", namespace=benchmark_operator.BMO_NAME) # Place holders for test results file (all sub-tests together) self.results_path = "" self.results_file = "" # Getting the full path for the test logs self.full_log_path = os.environ.get("PYTEST_CURRENT_TEST").split( "]")[0] self.full_log_path = self.full_log_path.replace("::", "/").replace("[", "-") log.info(f"Logs file path name is : {self.full_log_path}") # Collecting all Environment configuration Software & Hardware # for the performance report. self.environment = get_environment_info() self.environment["clusterID"] = get_running_cluster_id() self.ceph_cluster = CephCluster() self.used_capacity = self.get_cephfs_data() self.get_osd_info() self.get_node_info(node_type="master") self.get_node_info(node_type="worker")
def setup(self): """ Setting up the environment for each performance and scale test """ log.info("Setting up test environment") self.crd_data = None # place holder for Benchmark CDR data self.es_backup = None # place holder for the elasticsearch backup self.main_es = None # place holder for the main elasticsearch object self.benchmark_obj = None # place holder for the benchmark object self.client_pod = None # Place holder for the client pod object self.dev_mode = config.RUN["cli_params"].get("dev_mode") self.pod_obj = OCP(kind="pod", namespace=benchmark_operator.BMO_NAME) # Collecting all Environment configuration Software & Hardware # for the performance report. self.environment = get_environment_info() self.environment["clusterID"] = get_running_cluster_id() self.get_osd_info() self.get_node_info(node_type="master") self.get_node_info(node_type="worker")
def smallfile_workload(ripsaw, es, file_size, files, threads, samples, interface): """ Run SmallFile Workload SmallFile workload uses https://github.com/distributed-system-analysis/smallfile smallfile is a python-based distributed POSIX workload generator which can be used to quickly measure performance and scaling for a variety of metadata-intensive workloads Args: ripsaw -- Ripsaw fixture to setup/teardown ripsaw es -- Elastic search fixture file_siles -- size of file to be created files -- number of files to be created threads -- number of threads to run samples -- samples taken if running performance tests interface -- CephFileSystem or CephBlockPool Returns: backup_es (str) -- backup elastic search location environment (dict) -- environment (user is changed at times) sf_data (dict) -- small file data extracted from yaml and modified by this routine """ # Loading the main template yaml file for the benchmark sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML) # Saving the Original elastic-search IP and PORT - if defined in yaml if "elasticsearch" in sf_data["spec"]: sf_data["spec"]["elasticsearch"][ "url"] = f"http://{sf_data['spec']['elasticsearch']['server']}:{sf_data['spec']['elasticsearch']['port']}" backup_es = sf_data["spec"]["elasticsearch"] else: log.warning("Elastic Search information does not exists in YAML file") sf_data["spec"]["elasticsearch"] = {} # Use the internal define elastic-search server in the test - if exist if es: sf_data["spec"]["elasticsearch"] = { "url": f"http://{es.get_ip()}:{es.get_port()}", "server": es.get_ip(), "port": es.get_port(), } log.info("Apply Operator CRD") ripsaw.apply_crd("resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml") if interface == constants.CEPHBLOCKPOOL: storageclass = constants.DEFAULT_STORAGECLASS_RBD else: storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS log.info(f"Using {storageclass} Storageclass") sf_data["spec"]["workload"]["args"]["storageclass"] = storageclass log.info("Running SmallFile bench") """ Setting up the parameters for this test """ sf_data["spec"]["workload"]["args"]["file_size"] = file_size sf_data["spec"]["workload"]["args"]["files"] = files sf_data["spec"]["workload"]["args"]["threads"] = threads sf_data["spec"]["workload"]["args"]["samples"] = samples """ Calculating the size of the volume that need to be test, it should be at least twice in the size then the size of the files, and at least 100Gi. Since the file_size is in Kb and the vol_size need to be in Gb, more calculation is needed. """ vol_size = int(files * threads * file_size * 3) vol_size = int(vol_size / constants.GB2KB) if vol_size < 100: vol_size = 100 sf_data["spec"]["workload"]["args"]["storagesize"] = f"{vol_size}Gi" environment = get_environment_info() if not environment["user"] == "": sf_data["spec"]["test_user"] = environment["user"] else: # since full results object need this parameter, initialize it from CR file environment["user"] = sf_data["spec"]["test_user"] sf_data["spec"]["clustername"] = environment["clustername"] sf_obj = OCS(**sf_data) sf_obj.create() log.info(f"The smallfile yaml file is {sf_data}") # wait for benchmark pods to get created - takes a while for bench_pod in TimeoutSampler( 240, 10, get_pod_name_by_pattern, "smallfile-client", constants.RIPSAW_NAMESPACE, ): try: if bench_pod[0] is not None: small_file_client_pod = bench_pod[0] break except IndexError: log.info("Bench pod not ready yet") bench_pod = OCP(kind="pod", namespace=constants.RIPSAW_NAMESPACE) log.info("Waiting for SmallFile benchmark to Run") assert bench_pod.wait_for_resource( condition=constants.STATUS_RUNNING, resource_name=small_file_client_pod, sleep=30, timeout=600, ) return backup_es, environment, sf_data
def test_pvc_snapshot_performance_multiple_files(self, file_size, files, threads, interface): """ Run SmallFile Workload and the take snapshot. test will run with 1M of file on the volume - total data set is the same for all tests, ~30GiB, and then take snapshot and measure the time it takes. the test will run 3 time to check consistency. Args: file_size (int): the size of the file to be create - in KiB files (int): number of files each thread will create threads (int): number of threads will be used in the workload interface (str): the volume interface that will be used CephBlockPool / CephFileSystem Raises: TimeoutError : in case of creation files take too long time more then 2 Hours """ # Deploying elastic-search server in the cluster for use by the # SmallFiles workload, since it is mandatory for the workload. # This is deployed once for all test iterations and will be deleted # in the end of the test. self.es = ElasticSearch() # Loading the main template yaml file for the benchmark and update some # fields with new values sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML) if interface == constants.CEPHBLOCKPOOL: storageclass = constants.DEFAULT_STORAGECLASS_RBD else: storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS log.info(f"Using {storageclass} Storageclass") # Setting up the parameters for this test sf_data["spec"]["workload"]["args"]["samples"] = 1 sf_data["spec"]["workload"]["args"]["operation"] = ["create"] sf_data["spec"]["workload"]["args"]["file_size"] = file_size sf_data["spec"]["workload"]["args"]["files"] = files sf_data["spec"]["workload"]["args"]["threads"] = threads sf_data["spec"]["workload"]["args"]["storageclass"] = storageclass sf_data["spec"]["elasticsearch"] = { "url": f"http://{self.es.get_ip()}:{self.es.get_port()}" } """ Calculating the size of the volume that need to be test, it should be at least twice in the size then the size of the files, and at least 100Gi. Since the file_size is in Kb and the vol_size need to be in Gb, more calculation is needed. """ total_files = int(files * threads) total_data = int(files * threads * file_size / constants.GB2KB) data_set = int(total_data * 3) # calculate data with replica vol_size = data_set if data_set >= 100 else 100 sf_data["spec"]["workload"]["args"]["storagesize"] = f"{vol_size}Gi" environment = get_environment_info() if not environment["user"] == "": sf_data["spec"]["test_user"] = environment["user"] else: # since full results object need this parameter, initialize it from CR file environment["user"] = sf_data["spec"]["test_user"] sf_data["spec"]["clustername"] = environment["clustername"] log.debug(f"The smallfile yaml file is {sf_data}") # Deploy the benchmark-operator, so we can use the SmallFiles workload # to fill up the volume with files, and switch to the benchmark-operator namespace. log.info("Deploy the benchmark-operator") self.deploy_benchmark_operator() switch_to_project(BMO_NAME) all_results = [] self.results_path = get_full_test_logs_path(cname=self) log.info(f"Logs file path name is : {self.full_log_path}") # Produce ES report # Collecting environment information self.get_env_info() # Initialize the results doc file. self.full_results = self.init_full_results( ResultsAnalyse( self.uuid, self.crd_data, self.full_log_path, "pvc_snapshot_perf_multiple_files", )) self.full_results.add_key("file_size_inKB", file_size) self.full_results.add_key("threads", threads) self.full_results.add_key("interface", interface) for test_num in range(self.tests_numbers): test_results = {"creation_time": None, "csi_creation_time": None} # deploy the smallfile workload log.info("Running SmallFile bench") sf_obj = OCS(**sf_data) sf_obj.create() # wait for benchmark pods to get created - takes a while for bench_pod in TimeoutSampler( 240, 10, get_pod_name_by_pattern, "smallfile-client", BMO_NAME, ): try: if bench_pod[0] is not None: small_file_client_pod = bench_pod[0] break except IndexError: log.info("Bench pod not ready yet") bench_pod = OCP(kind="pod", namespace=BMO_NAME) log.info("Waiting for SmallFile benchmark to Run") assert bench_pod.wait_for_resource( condition=constants.STATUS_RUNNING, resource_name=small_file_client_pod, sleep=30, timeout=600, ) # Initialize the pvc_name variable so it will not be in loop scope only. pvc_name = "" for item in bench_pod.get()["items"]: if item.get("metadata").get("name") == small_file_client_pod: for volume in item.get("spec").get("volumes"): if "persistentVolumeClaim" in volume: pvc_name = volume["persistentVolumeClaim"][ "claimName"] break log.info(f"Benchmark PVC name is : {pvc_name}") # Creation of 1M files on CephFS can take a lot of time timeout = 7200 while timeout >= 0: logs = bench_pod.get_logs(name=small_file_client_pod) if "RUN STATUS DONE" in logs: break timeout -= 30 if timeout == 0: raise TimeoutError( "Timed out waiting for benchmark to complete") time.sleep(30) log.info(f"Smallfile test ({test_num + 1}) finished.") # Taking snapshot of the PVC (which contain files) snap_name = pvc_name.replace("claim", "snapshot-") log.info(f"Taking snapshot of the PVC {pvc_name}") log.info(f"Snapshot name : {snap_name}") start_time = datetime.datetime.utcnow().strftime( "%Y-%m-%dT%H:%M:%SZ") test_results["creation_time"] = self.measure_create_snapshot_time( pvc_name=pvc_name, snap_name=snap_name, namespace=BMO_NAME, interface=interface, start_time=start_time, ) log.info( f"Snapshot with name {snap_name} and id {self.snap_uid} creation time is" f' {test_results["creation_time"]} seconds') test_results[ "csi_creation_time"] = performance_lib.measure_csi_snapshot_creation_time( interface=interface, snapshot_id=self.snap_uid, start_time=start_time) log.info( f"Snapshot with name {snap_name} and id {self.snap_uid} csi creation time is" f' {test_results["csi_creation_time"]} seconds') all_results.append(test_results) # Delete the smallfile workload - which will delete also the PVC log.info("Deleting the smallfile workload") if sf_obj.delete(wait=True): log.info("The smallfile workload was deleted successfully") # Delete VolumeSnapshots log.info("Deleting the snapshots") if self.snap_obj.delete(wait=True): log.info("The snapshot deleted successfully") log.info("Verify (and wait if needed) that ceph health is OK") ceph_health_check(tries=45, delay=60) # Sleep for 1 Min. between test samples time.sleep(60) # Cleanup the elasticsearch instance. log.info("Deleting the elastic-search instance") self.es.cleanup() creation_times = [t["creation_time"] for t in all_results] avg_c_time = statistics.mean(creation_times) csi_creation_times = [t["csi_creation_time"] for t in all_results] avg_csi_c_time = statistics.mean(csi_creation_times) t_dateset = int(data_set / 3) log.info(f"Full test report for {interface}:") log.info(f"Test ran {self.tests_numbers} times, " f"All snapshot creation results are {creation_times} seconds") log.info( f"The average snapshot creation time is : {avg_c_time} seconds") log.info(f"Test ran {self.tests_numbers} times, " f"All snapshot csi creation results are {csi_creation_times}") log.info( f"The average csi snapshot creation time is : {avg_csi_c_time}") log.info(f"Number of Files on the volume : {total_files:,}, " f"Total dataset : {t_dateset} GiB") self.full_results.add_key("avg_snapshot_creation_time_insecs", avg_c_time) self.full_results.all_results["total_files"] = total_files self.full_results.all_results["total_dataset"] = t_dateset self.full_results.all_results["creation_time"] = creation_times self.full_results.all_results["csi_creation_time"] = csi_creation_times # Write the test results into the ES server log.info("writing results to elastic search server") if self.full_results.es_write(): res_link = self.full_results.results_link() # write the ES link to the test results in the test log. log.info(f"The result can be found at : {res_link}") # Create text file with results of all subtest self.write_result_to_file(res_link)
def test_fio_workload_simple(self, ripsaw, es, interface, io_pattern): """ This is a basic fio perf test """ # Deployment ripsaw log.info("Deploying ripsaw operator") ripsaw.apply_crd('resources/crds/' 'ripsaw_v1alpha1_ripsaw_crd.yaml') if interface == 'CephBlockPool': sc = constants.CEPHBLOCKPOOL_SC else: sc = constants.CEPHFILESYSTEM_SC # Create fio benchmark log.info("Create resource file for fio workload") fio_cr = templating.load_yaml(constants.FIO_CR_YAML) # Saving the Original elastic-search IP and PORT - if defined in yaml if 'elasticsearch' in fio_cr['spec']: backup_es = fio_cr['spec']['elasticsearch'] else: log.warning( 'Elastic Search information does not exists in YAML file') fio_cr['spec']['elasticsearch'] = {} # Use the internal define elastic-search server in the test - if exist if es: fio_cr['spec']['elasticsearch'] = { 'server': es.get_ip(), 'port': es.get_port() } # Setting the data set to 40% of the total storage capacity ceph_cluster = CephCluster() ceph_capacity = ceph_cluster.get_ceph_capacity() total_data_set = int(ceph_capacity * 0.4) filesize = int(fio_cr['spec']['workload']['args']['filesize'].replace( 'GiB', '')) # To make sure the number of App pods will not be more then 50, in case # of large data set, changing the size of the file each pod will work on if total_data_set > 500: filesize = int(ceph_capacity * 0.008) fio_cr['spec']['workload']['args']['filesize'] = f'{filesize}GiB' # make sure that the storage size is larger then the file size fio_cr['spec']['workload']['args'][ 'storagesize'] = f'{int(filesize * 1.2)}Gi' fio_cr['spec']['workload']['args']['servers'] = int(total_data_set / filesize) log.info(f'Total Data set to work on is : {total_data_set} GiB') environment = get_environment_info() if not environment['user'] == '': fio_cr['spec']['test_user'] = environment['user'] fio_cr['spec']['clustername'] = environment['clustername'] log.debug(f'Environment information is : {environment}') fio_cr['spec']['workload']['args']['storageclass'] = sc if io_pattern == 'sequential': fio_cr['spec']['workload']['args']['jobs'] = ['write', 'read'] fio_cr['spec']['workload']['args']['iodepth'] = 1 log.info(f'The FIO CR file is {fio_cr}') fio_cr_obj = OCS(**fio_cr) fio_cr_obj.create() # Wait for fio client pod to be created for fio_pod in TimeoutSampler(300, 20, get_pod_name_by_pattern, 'fio-client', constants.RIPSAW_NAMESPACE): try: if fio_pod[0] is not None: fio_client_pod = fio_pod[0] break except IndexError: log.info("Bench pod not ready yet") # Getting the start time of the test start_time = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime()) # Getting the UUID from inside the benchmark pod uuid = ripsaw.get_uuid(fio_client_pod) # Setting back the original elastic-search information fio_cr['spec']['elasticsearch'] = backup_es full_results = FIOResultsAnalyse(uuid, fio_cr) # Initialize the results doc file. for key in environment: full_results.add_key(key, environment[key]) # Setting the global parameters of the test full_results.add_key('io_pattern', io_pattern) full_results.add_key('dataset', f'{total_data_set}GiB') full_results.add_key('file_size', fio_cr['spec']['workload']['args']['filesize']) full_results.add_key('servers', fio_cr['spec']['workload']['args']['servers']) full_results.add_key('samples', fio_cr['spec']['workload']['args']['samples']) full_results.add_key('operations', fio_cr['spec']['workload']['args']['jobs']) full_results.add_key('block_sizes', fio_cr['spec']['workload']['args']['bs']) full_results.add_key('io_depth', fio_cr['spec']['workload']['args']['iodepth']) full_results.add_key('jobs', fio_cr['spec']['workload']['args']['numjobs']) full_results.add_key( 'runtime', { 'read': fio_cr['spec']['workload']['args']['read_runtime'], 'write': fio_cr['spec']['workload']['args']['write_runtime'] }) full_results.add_key( 'storageclass', fio_cr['spec']['workload']['args']['storageclass']) full_results.add_key('vol_size', fio_cr['spec']['workload']['args']['storagesize']) # Wait for fio pod to initialized and complete log.info("Waiting for fio_client to complete") pod_obj = OCP(kind='pod') pod_obj.wait_for_resource( condition='Completed', resource_name=fio_client_pod, timeout=18000, sleep=300, ) # Getting the end time of the test end_time = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime()) full_results.add_key('test_time', { 'start': start_time, 'end': end_time }) output = run_cmd(f'oc logs {fio_client_pod}') log.info(f'The Test log is : {output}') try: if 'Fio failed to execute' not in output: log.info("FIO has completed successfully") except IOError: log.info("FIO failed to complete") # Clean up fio benchmark log.info("Deleting FIO benchmark") fio_cr_obj.delete() log.debug(f'Full results is : {full_results.results}') # if Internal ES is exists, Copy all data from the Internal to main ES if es: log.info('Copy all data from Internal ES to Main ES') es._copy(full_results.es) # Adding this sleep between the copy and the analyzing of the results # since sometimes the results of the read (just after write) are empty time.sleep(30) full_results.analyze_results() # Analyze the results # Writing the analyzed test results to the Elastic-Search server full_results.es_write() full_results.codespeed_push() # Push results to codespeed # Creating full link to the results on the ES server log.info(f'The Result can be found at ; {full_results.results_link()}')
def test_pvc_snapshot_performance_multiple_files(self, ripsaw, file_size, files, threads, interface): """ Run SmallFile Workload and the take snapshot. test will run with 1M, 2M and 4M of file on the volume - total data set is the same for all tests, ~30GiB, and then take snapshot and measure the time it takes. the test will run 3 time to check consistency. Args: ripsaw : benchmark operator fixture which will run the workload file_size (int): the size of the file to be create - in KiB files (int): number of files each thread will create threads (int): number of threads will be used in the workload interface (str): the volume interface that will be used CephBlockPool / CephFileSystem Raises: TimeoutError : in case of creation files take too long time more then 2 Hours """ # Loading the main template yaml file for the benchmark and update some # fields with new values sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML) if interface == constants.CEPHBLOCKPOOL: storageclass = constants.DEFAULT_STORAGECLASS_RBD else: storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS log.info(f"Using {storageclass} Storageclass") # Setting up the parameters for this test sf_data["spec"]["workload"]["args"]["samples"] = 1 sf_data["spec"]["workload"]["args"]["operation"] = ["create"] sf_data["spec"]["workload"]["args"]["file_size"] = file_size sf_data["spec"]["workload"]["args"]["files"] = files sf_data["spec"]["workload"]["args"]["threads"] = threads sf_data["spec"]["workload"]["args"]["storageclass"] = storageclass del sf_data["spec"]["elasticsearch"] """ Calculating the size of the volume that need to be test, it should be at least twice in the size then the size of the files, and at least 100Gi. Since the file_size is in Kb and the vol_size need to be in Gb, more calculation is needed. """ total_files = int(files * threads) total_data = int(files * threads * file_size / constants.GB2KB) data_set = int(total_data * 3) # calculate data with replica vol_size = data_set if data_set >= 100 else 100 sf_data["spec"]["workload"]["args"]["storagesize"] = f"{vol_size}Gi" environment = get_environment_info() if not environment["user"] == "": sf_data["spec"]["test_user"] = environment["user"] else: # since full results object need this parameter, initialize it from CR file environment["user"] = sf_data["spec"]["test_user"] sf_data["spec"]["clustername"] = environment["clustername"] log.debug(f"The smallfile yaml file is {sf_data}") # Deploy the ripsaw operator log.info("Apply Operator CRD") ripsaw.apply_crd("resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml") all_results = [] for test_num in range(self.tests_numbers): # deploy the smallfile workload log.info("Running SmallFile bench") sf_obj = OCS(**sf_data) sf_obj.create() # wait for benchmark pods to get created - takes a while for bench_pod in TimeoutSampler( 240, 10, get_pod_name_by_pattern, "smallfile-client", constants.RIPSAW_NAMESPACE, ): try: if bench_pod[0] is not None: small_file_client_pod = bench_pod[0] break except IndexError: log.info("Bench pod not ready yet") bench_pod = OCP(kind="pod", namespace=constants.RIPSAW_NAMESPACE) log.info("Waiting for SmallFile benchmark to Run") assert bench_pod.wait_for_resource( condition=constants.STATUS_RUNNING, resource_name=small_file_client_pod, sleep=30, timeout=600, ) for item in bench_pod.get()["items"][1]["spec"]["volumes"]: if "persistentVolumeClaim" in item: pvc_name = item["persistentVolumeClaim"]["claimName"] break log.info(f"Benchmark PVC name is : {pvc_name}") # Creation of 4M files on CephFS can take a lot of time timeout = 7200 while timeout >= 0: logs = bench_pod.get_logs(name=small_file_client_pod) if "RUN STATUS DONE" in logs: break timeout -= 30 if timeout == 0: raise TimeoutError( "Timed out waiting for benchmark to complete") time.sleep(30) log.info(f"Smallfile test ({test_num + 1}) finished.") snap_name = pvc_name.replace("claim", "snapshot-") log.info(f"Taking snapshot of the PVC {pvc_name}") log.info(f"Snapshot name : {snap_name}") creation_time = self.measure_create_snapshot_time( pvc_name=pvc_name, snap_name=snap_name, interface=interface) log.info(f"Snapshot creation time is {creation_time} seconds") all_results.append(creation_time) # Delete the smallfile workload log.info("Deleting the smallfile workload") if sf_obj.delete(wait=True): log.info("The smallfile workload was deleted successfully") # Delete VolumeSnapshots log.info("Deleting the snapshots") if self.snap_obj.delete(wait=True): log.info("The snapshot deleted successfully") log.info("Verify (and wait if needed) that ceph health is OK") ceph_health_check(tries=45, delay=60) log.info(f"Full test report for {interface}:") log.info(f"Test ran {self.tests_numbers} times, " f"All results are {all_results}") log.info( f"The average creation time is : {statistics.mean(all_results)}") log.info(f"Number of Files on the volume : {total_files:,}, " f"Total dataset : {int(data_set / 3)} GiB")
def test_fio_workload_simple(self, ripsaw, es, interface, io_pattern): """ This is a basic fio perf test """ # Deployment ripsaw log.info("Deploying ripsaw operator") ripsaw.apply_crd("resources/crds/" "ripsaw_v1alpha1_ripsaw_crd.yaml") if interface == "CephBlockPool": sc = constants.CEPHBLOCKPOOL_SC else: sc = constants.CEPHFILESYSTEM_SC # Create fio benchmark log.info("Create resource file for fio workload") fio_cr = templating.load_yaml(constants.FIO_CR_YAML) # Saving the Original elastic-search IP and PORT - if defined in yaml if "elasticsearch" in fio_cr["spec"]: backup_es = fio_cr["spec"]["elasticsearch"] else: log.warning( "Elastic Search information does not exists in YAML file") fio_cr["spec"]["elasticsearch"] = {} # Use the internal define elastic-search server in the test - if exist if es: fio_cr["spec"]["elasticsearch"] = { "server": es.get_ip(), "port": es.get_port(), } # Setting the data set to 40% of the total storage capacity ceph_cluster = CephCluster() ceph_capacity = ceph_cluster.get_ceph_capacity() total_data_set = int(ceph_capacity * 0.4) filesize = int(fio_cr["spec"]["workload"]["args"]["filesize"].replace( "GiB", "")) # To make sure the number of App pods will not be more then 50, in case # of large data set, changing the size of the file each pod will work on if total_data_set > 500: filesize = int(ceph_capacity * 0.008) fio_cr["spec"]["workload"]["args"]["filesize"] = f"{filesize}GiB" # make sure that the storage size is larger then the file size fio_cr["spec"]["workload"]["args"][ "storagesize"] = f"{int(filesize * 1.2)}Gi" fio_cr["spec"]["workload"]["args"]["servers"] = int(total_data_set / filesize) log.info(f"Total Data set to work on is : {total_data_set} GiB") environment = get_environment_info() if not environment["user"] == "": fio_cr["spec"]["test_user"] = environment["user"] fio_cr["spec"]["clustername"] = environment["clustername"] log.debug(f"Environment information is : {environment}") fio_cr["spec"]["workload"]["args"]["storageclass"] = sc if io_pattern == "sequential": fio_cr["spec"]["workload"]["args"]["jobs"] = ["write", "read"] fio_cr["spec"]["workload"]["args"]["iodepth"] = 1 log.info(f"The FIO CR file is {fio_cr}") fio_cr_obj = OCS(**fio_cr) fio_cr_obj.create() # Wait for fio client pod to be created for fio_pod in TimeoutSampler(300, 20, get_pod_name_by_pattern, "fio-client", constants.RIPSAW_NAMESPACE): try: if fio_pod[0] is not None: fio_client_pod = fio_pod[0] break except IndexError: log.info("Bench pod not ready yet") # Getting the start time of the test start_time = time.strftime("%Y-%m-%dT%H:%M:%SGMT", time.gmtime()) # Getting the UUID from inside the benchmark pod uuid = ripsaw.get_uuid(fio_client_pod) # Setting back the original elastic-search information fio_cr["spec"]["elasticsearch"] = backup_es full_results = FIOResultsAnalyse(uuid, fio_cr) # Initialize the results doc file. for key in environment: full_results.add_key(key, environment[key]) # Setting the global parameters of the test full_results.add_key("io_pattern", io_pattern) full_results.add_key("dataset", f"{total_data_set}GiB") full_results.add_key("file_size", fio_cr["spec"]["workload"]["args"]["filesize"]) full_results.add_key("servers", fio_cr["spec"]["workload"]["args"]["servers"]) full_results.add_key("samples", fio_cr["spec"]["workload"]["args"]["samples"]) full_results.add_key("operations", fio_cr["spec"]["workload"]["args"]["jobs"]) full_results.add_key("block_sizes", fio_cr["spec"]["workload"]["args"]["bs"]) full_results.add_key("io_depth", fio_cr["spec"]["workload"]["args"]["iodepth"]) full_results.add_key("jobs", fio_cr["spec"]["workload"]["args"]["numjobs"]) full_results.add_key( "runtime", { "read": fio_cr["spec"]["workload"]["args"]["read_runtime"], "write": fio_cr["spec"]["workload"]["args"]["write_runtime"], }, ) full_results.add_key( "storageclass", fio_cr["spec"]["workload"]["args"]["storageclass"]) full_results.add_key("vol_size", fio_cr["spec"]["workload"]["args"]["storagesize"]) # Wait for fio pod to initialized and complete log.info("Waiting for fio_client to complete") pod_obj = OCP(kind="pod") pod_obj.wait_for_resource( condition="Completed", resource_name=fio_client_pod, timeout=18000, sleep=300, ) # Getting the end time of the test end_time = time.strftime("%Y-%m-%dT%H:%M:%SGMT", time.gmtime()) full_results.add_key("test_time", { "start": start_time, "end": end_time }) output = run_cmd(f"oc logs {fio_client_pod}") log.info(f"The Test log is : {output}") try: if "Fio failed to execute" not in output: log.info("FIO has completed successfully") except IOError: log.info("FIO failed to complete") # Clean up fio benchmark log.info("Deleting FIO benchmark") fio_cr_obj.delete() log.debug(f"Full results is : {full_results.results}") # if Internal ES is exists, Copy all data from the Internal to main ES if es: log.info("Copy all data from Internal ES to Main ES") es._copy(full_results.es) # Adding this sleep between the copy and the analyzing of the results # since sometimes the results of the read (just after write) are empty time.sleep(30) full_results.analyze_results() # Analyze the results # Writing the analyzed test results to the Elastic-Search server full_results.es_write() full_results.codespeed_push() # Push results to codespeed # Creating full link to the results on the ES server log.info(f"The Result can be found at ; {full_results.results_link()}")
def test_smallfile_workload( self, ripsaw, es, file_size, files, threads, samples, interface ): """ Run SmallFile Workload """ # Loading the main template yaml file for the benchmark sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML) # Saving the Original elastic-search IP and PORT - if defined in yaml if "elasticsearch" in sf_data["spec"]: sf_data["spec"]["elasticsearch"][ "url" ] = f"http://{sf_data['spec']['elasticsearch']['server']}:{sf_data['spec']['elasticsearch']['port']}" backup_es = sf_data["spec"]["elasticsearch"] else: log.warning("Elastic Search information does not exists in YAML file") sf_data["spec"]["elasticsearch"] = {} # Use the internal define elastic-search server in the test - if exist if es: sf_data["spec"]["elasticsearch"] = { "url": f"http://{es.get_ip()}:{es.get_port()}", "server": es.get_ip(), "port": es.get_port(), } log.info("Apply Operator CRD") ripsaw.apply_crd("resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml") if interface == constants.CEPHBLOCKPOOL: storageclass = constants.DEFAULT_STORAGECLASS_RBD else: storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS log.info(f"Using {storageclass} Storageclass") sf_data["spec"]["workload"]["args"]["storageclass"] = storageclass log.info("Running SmallFile bench") """ Setting up the parameters for this test """ sf_data["spec"]["workload"]["args"]["file_size"] = file_size sf_data["spec"]["workload"]["args"]["files"] = files sf_data["spec"]["workload"]["args"]["threads"] = threads sf_data["spec"]["workload"]["args"]["samples"] = samples """ Calculating the size of the volume that need to be test, it should be at least twice in the size then the size of the files, and at least 100Gi. Since the file_size is in Kb and the vol_size need to be in Gb, more calculation is needed. """ vol_size = int(files * threads * file_size * 3) vol_size = int(vol_size / constants.GB2KB) if vol_size < 100: vol_size = 100 sf_data["spec"]["workload"]["args"]["storagesize"] = f"{vol_size}Gi" environment = get_environment_info() if not environment["user"] == "": sf_data["spec"]["test_user"] = environment["user"] else: # since full results object need this parameter, initialize it from CR file environment["user"] = sf_data["spec"]["test_user"] sf_data["spec"]["clustername"] = environment["clustername"] sf_obj = OCS(**sf_data) sf_obj.create() log.info(f"The smallfile yaml file is {sf_data}") # wait for benchmark pods to get created - takes a while for bench_pod in TimeoutSampler( 240, 10, get_pod_name_by_pattern, "smallfile-client", constants.RIPSAW_NAMESPACE, ): try: if bench_pod[0] is not None: small_file_client_pod = bench_pod[0] break except IndexError: log.info("Bench pod not ready yet") bench_pod = OCP(kind="pod", namespace=constants.RIPSAW_NAMESPACE) log.info("Waiting for SmallFile benchmark to Run") assert bench_pod.wait_for_resource( condition=constants.STATUS_RUNNING, resource_name=small_file_client_pod, sleep=30, timeout=600, ) # Getting the start time of the test start_time = time.strftime("%Y-%m-%dT%H:%M:%SGMT", time.gmtime()) test_start_time = time.time() # After testing manually, changing the timeout timeout = 3600 # Getting the UUID from inside the benchmark pod uuid = ripsaw.get_uuid(small_file_client_pod) # Setting back the original elastic-search information if backup_es: sf_data["spec"]["elasticsearch"] = backup_es full_results = SmallFileResultsAnalyse(uuid, sf_data) # Initialize the results doc file. for key in environment: full_results.add_key(key, environment[key]) # Calculating the total size of the working data set - in GB full_results.add_key( "dataset", file_size * files * threads * full_results.results["clients"] / constants.GB2KB, ) full_results.add_key( "global_options", { "files": files, "file_size": file_size, "storageclass": sf_data["spec"]["workload"]["args"]["storageclass"], "vol_size": sf_data["spec"]["workload"]["args"]["storagesize"], }, ) while True: logs = bench_pod.exec_oc_cmd( f"logs {small_file_client_pod}", out_yaml_format=False ) if "RUN STATUS DONE" in logs: # Getting the end time of the test end_time = time.strftime("%Y-%m-%dT%H:%M:%SGMT", time.gmtime()) full_results.add_key( "test_time", {"start": start_time, "end": end_time} ) # if Internal ES is exists, Copy all data from the Internal to main ES if es: log.info("Copy all data from Internal ES to Main ES") es._copy(full_results.es) full_results.read() if not full_results.dont_check: full_results.add_key("hosts", full_results.get_clients_list()) full_results.init_full_results() full_results.aggregate_host_results() test_status = full_results.aggregate_samples_results() full_results.es_write() # Creating full link to the results on the ES server log.info( f"The Result can be found at ; {full_results.results_link()}" ) else: test_status = True break if timeout < (time.time() - test_start_time): raise TimeoutError("Timed out waiting for benchmark to complete") time.sleep(30) assert not get_logs_with_errors() and test_status, "Test Failed"
def test_pvc_snapshot_performance_multiple_files(self, file_size, files, threads, interface): """ Run SmallFile Workload and the take snapshot. test will run with 1M of file on the volume - total data set is the same for all tests, ~30GiB, and then take snapshot and measure the time it takes. the test will run 3 time to check consistency. Args: file_size (int): the size of the file to be create - in KiB files (int): number of files each thread will create threads (int): number of threads will be used in the workload interface (str): the volume interface that will be used CephBlockPool / CephFileSystem Raises: TimeoutError : in case of creation files take too long time more then 2 Hours """ # Loading the main template yaml file for the benchmark and update some # fields with new values sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML) # Deploying elastic-search server in the cluster for use by the # SmallFiles workload, since it is mandatory for the workload. # This is deployed once for all test iterations and will be deleted # in the end of the test. if config.PERF.get("deploy_internal_es"): self.es = ElasticSearch() sf_data["spec"]["elasticsearch"] = { "url": f"http://{self.es.get_ip()}:{self.es.get_port()}" } else: if config.PERF.get("internal_es_server") == "": self.es = None return else: self.es = { "server": config.PERF.get("internal_es_server"), "port": config.PERF.get("internal_es_port"), "url": f"http://{config.PERF.get('internal_es_server')}:{config.PERF.get('internal_es_port')}", } # verify that the connection to the elasticsearch server is OK if not super(TestPvcSnapshotPerformance, self).es_connect(): self.es = None log.error( "ElasticSearch doesn't exist ! The test cannot run") return sf_data["spec"]["elasticsearch"] = {"url": self.es["url"]} if interface == constants.CEPHBLOCKPOOL: storageclass = constants.DEFAULT_STORAGECLASS_RBD else: storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS log.info(f"Using {storageclass} Storageclass") # Setting up the parameters for this test sf_data["spec"]["workload"]["args"]["samples"] = 1 sf_data["spec"]["workload"]["args"]["operation"] = ["create"] sf_data["spec"]["workload"]["args"]["file_size"] = file_size sf_data["spec"]["workload"]["args"]["files"] = files sf_data["spec"]["workload"]["args"]["threads"] = threads sf_data["spec"]["workload"]["args"]["storageclass"] = storageclass """ Calculating the size of the volume that need to be test, it should be at least twice in the size then the size of the files, and at least 100Gi. Since the file_size is in Kb and the vol_size need to be in Gb, more calculation is needed. """ total_files = int(files * threads) total_data = int(files * threads * file_size / constants.GB2KB) data_set = int(total_data * 3) # calculate data with replica vol_size = data_set if data_set >= 100 else 100 sf_data["spec"]["workload"]["args"]["storagesize"] = f"{vol_size}Gi" environment = get_environment_info() if not environment["user"] == "": sf_data["spec"]["test_user"] = environment["user"] else: # since full results object need this parameter, initialize it from CR file environment["user"] = sf_data["spec"]["test_user"] sf_data["spec"]["clustername"] = environment["clustername"] log.debug(f"The smallfile yaml file is {sf_data}") # Deploy the benchmark-operator, so we can use the SmallFiles workload # to fill up the volume with files, and switch to the benchmark-operator namespace. log.info("Deploy the benchmark-operator") self.deploy_benchmark_operator() switch_to_project(BMO_NAME) all_results = [] # Produce ES report # Collecting environment information self.get_env_info() # Initialize the results doc file. self.full_results = self.init_full_results( ResultsAnalyse( self.uuid, self.crd_data, self.full_log_path, "pvc_snapshot_perf_multiple_files", )) self.full_results.add_key("file_size_inKB", file_size) self.full_results.add_key("threads", threads) self.full_results.add_key("interface", interface) for test_num in range(self.tests_numbers): test_results = {"creation_time": None, "csi_creation_time": None} # deploy the smallfile workload self.crd_data = sf_data self.client_pod_name = "smallfile-client" self.deploy_and_wait_for_wl_to_start(timeout=240) # Initialize the pvc_name variable so it will not be in loop scope only. pvc_name = (OCP(kind="pvc", namespace=BMO_NAME).get().get("items") [0].get("metadata").get("name")) log.info(f"Benchmark PVC name is : {pvc_name}") self.wait_for_wl_to_finish(sleep=30) # Taking snapshot of the PVC (which contain files) snap_name = pvc_name.replace("claim", "snapshot-") log.info(f"Taking snapshot of the PVC {pvc_name}") log.info(f"Snapshot name : {snap_name}") start_time = self.get_time("csi") test_results["creation_time"] = self.measure_create_snapshot_time( pvc_name=pvc_name, snap_name=snap_name, namespace=BMO_NAME, interface=interface, start_time=start_time, ) log.info( f"Snapshot with name {snap_name} and id {self.snap_uid} creation time is" f' {test_results["creation_time"]} seconds') test_results[ "csi_creation_time"] = performance_lib.measure_csi_snapshot_creation_time( interface=interface, snapshot_id=self.snap_uid, start_time=start_time) log.info( f"Snapshot with name {snap_name} and id {self.snap_uid} csi creation time is" f' {test_results["csi_creation_time"]} seconds') all_results.append(test_results) # Delete the smallfile workload - which will delete also the PVC log.info("Deleting the smallfile workload") if self.benchmark_obj.delete(wait=True): log.info("The smallfile workload was deleted successfully") # Delete VolumeSnapshots log.info("Deleting the snapshots") if self.snap_obj.delete(wait=True): log.info("The snapshot deleted successfully") log.info("Verify (and wait if needed) that ceph health is OK") ceph_health_check(tries=45, delay=60) # Sleep for 1 Min. between test samples time.sleep(60) # Cleanup the elasticsearch instance, if needed. if isinstance(self.es, ElasticSearch): log.info("Deleting the elastic-search instance") self.es.cleanup() creation_times = [t["creation_time"] for t in all_results] avg_c_time = statistics.mean(creation_times) csi_creation_times = [t["csi_creation_time"] for t in all_results] avg_csi_c_time = statistics.mean(csi_creation_times) t_dateset = int(data_set / 3) log.info(f"Full test report for {interface}:") log.info(f"Test ran {self.tests_numbers} times, " f"All snapshot creation results are {creation_times} seconds") log.info( f"The average snapshot creation time is : {avg_c_time} seconds") log.info(f"Test ran {self.tests_numbers} times, " f"All snapshot csi creation results are {csi_creation_times}") log.info( f"The average csi snapshot creation time is : {avg_csi_c_time}") log.info(f"Number of Files on the volume : {total_files:,}, " f"Total dataset : {t_dateset} GiB") self.full_results.add_key("avg_snapshot_creation_time_insecs", avg_c_time) self.full_results.all_results["total_files"] = total_files self.full_results.all_results["total_dataset"] = t_dateset self.full_results.all_results["creation_time"] = creation_times self.full_results.all_results["csi_creation_time"] = csi_creation_times # Write the test results into the ES server log.info("writing results to elastic search server") self.results_path = helpers.get_full_test_logs_path(cname=self) if self.full_results.es_write(): res_link = self.full_results.results_link() # write the ES link to the test results in the test log. log.info(f"The result can be found at : {res_link}") # Create text file with results of all subtest self.write_result_to_file(res_link)