Exemplos de Elasticsearch.dumping_all_data em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: elasticsearch

Classe / Tipo: Elasticsearch

Método / Função: dumping_all_data

Exemplos em hotexamples.com: 2

Elasticsearch.dumping_all_data em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de elasticsearch.Elasticsearch.dumping_all_data em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Elasticsearch(30)

delete(30)

create(30)

count(30)

exists(30)

info(30)

index(30)

bulk(30)

msearch(30)

mget(30)

get(30)

delete_by_query(30)

get_source(26)

clear_scroll(24)

mlt(5)

__init__(5)

explain(4)

field_stats(3)

flush(2)

field_caps(2)

__hash__(2)

dumping_all_data(2)

delete_script(2)

commit(2)

cursor(2)

indices(2)

close(2)

cleanup(2)

ingest(2)

is_listening(2)

index_bulk(1)

index_get(1)

get_script(1)

index_topics(1)

keys(1)

list_database_names(1)

msearch_template(1)

msgBody(1)

get_settings(1)

exists_source(1)

get_field_mapping(1)

authors_to_update(1)

__str__(1)

_connected(1)

_eland_es_version(1)

_index(1)

_settings(1)

add(1)

articles_to_update(1)

bulk_index(1)

Métodos Frequentes

Elasticsearch (30)

delete (30)

create (30)

count (30)

exists (30)

info (30)

index (30)

bulk (30)

msearch (30)

mget (30)

Métodos Frequentes

get (30)

delete_by_query (30)

get_source (26)

clear_scroll (24)

mlt (5)

__init__ (5)

explain (4)

field_stats (3)

flush (2)

field_caps (2)

__hash__ (2)

dumping_all_data (2)

delete_script (2)

commit (2)

cursor (2)

indices (2)

close (2)

cleanup (2)

ingest (2)

is_listening (2)

Métodos Frequentes

__hash__ (2)

dumping_all_data (2)

delete_script (2)

commit (2)

cursor (2)

indices (2)

close (2)

cleanup (2)

ingest (2)

is_listening (2)

index_bulk (1)

index_get (1)

get_script (1)

index_topics (1)

keys (1)

list_database_names (1)

msearch_template (1)

msgBody (1)

get_settings (1)

exists_source (1)

get_field_mapping (1)

authors_to_update (1)

__str__ (1)

_connected (1)

_eland_es_version (1)

_index (1)

_settings (1)

add (1)

articles_to_update (1)

bulk_index (1)

Métodos Frequentes

index_bulk (1)

index_get (1)

get_script (1)

index_topics (1)

keys (1)

list_database_names (1)

msearch_template (1)

msgBody (1)

get_settings (1)

exists_source (1)

get_field_mapping (1)

authors_to_update (1)

__str__ (1)

_connected (1)

_eland_es_version (1)

_index (1)

_settings (1)

add (1)

articles_to_update (1)

bulk_index (1)

get_document_text_slice (1)

close_point_in_time (1)

cluster (1)

cluster_name (1)

create_index (1)

default_indices (1)

exists_type (1)

forcemerge (1)

get_data (1)

sources_to_update (1)

Exemplo n.º 1

0

Exibir arquivo

class TestSmallFileWorkload(PASTest): """ Deploy benchmark operator and run SmallFile workload SmallFile workload using https://github.com/distributed-system-analysis/smallfile smallfile is a python-based distributed POSIX workload generator which can be used to quickly measure performance for a variety of metadata-intensive workloads """ def setup(self): """ Setting up test parameters """ log.info("Starting the test setup") self.benchmark_name = "SmallFiles" self.client_pod_name = "smallfile-client" if config.PERF.get("deploy_internal_es"): self.es = ElasticSearch() else: if config.PERF.get("internal_es_server") == "": self.es = None return else: self.es = { "server": config.PERF.get("internal_es_server"), "port": config.PERF.get("internal_es_port"), "url": f"http://{config.PERF.get('internal_es_server')}:{config.PERF.get('internal_es_port')}", } # verify that the connection to the elasticsearch server is OK if not super(TestSmallFileWorkload, self).es_connect(): self.es = None return super(TestSmallFileWorkload, self).setup() # deploy the benchmark-operator self.deploy_benchmark_operator() def setting_storage_usage(self, file_size, files, threads, samples, clients): """ Getting the storage capacity, calculate the usage of the storage and setting the workload CR rile parameters. Args: file_size (int) : the size of the file to be used files (int) : number of files to use threads (int) : number of threads to be use in the test samples (int) : how meany samples to run for each test clients (int) : number of clients (pods) to use in the test """ self.crd_data["spec"]["workload"]["args"]["file_size"] = file_size self.crd_data["spec"]["workload"]["args"]["files"] = files self.crd_data["spec"]["workload"]["args"]["threads"] = threads self.crd_data["spec"]["workload"]["args"]["samples"] = samples self.crd_data["spec"]["workload"]["args"]["clients"] = clients # Calculating the size of the volume that need to be test, it should # be at least twice in the size then the size of the files, and at # least 100Gi. # Since the file_size is in Kb and the vol_size need to be in Gb, more # calculation is needed. vol_size = int(files * threads * file_size * 3) vol_size = int(vol_size / constants.GB2KB) if vol_size < 100: vol_size = 100 self.crd_data["spec"]["workload"]["args"][ "storagesize"] = f"{vol_size}Gi" def init_full_results(self, full_results): """ Initialize the full results object which will send to the ES server Args: full_results (obj): an empty SmallFileResultsAnalyse object Returns: SmallFileResultsAnalyse (obj): the input object fill with data """ for key in self.environment: full_results.add_key(key, self.environment[key]) # Calculating the total size of the working data set - in GB full_results.add_key( "dataset", self.crd_data["spec"]["workload"]["args"]["file_size"] * self.crd_data["spec"]["workload"]["args"]["files"] * self.crd_data["spec"]["workload"]["args"]["threads"] * full_results.results["clients"] / constants.GB2KB, ) full_results.add_key( "global_options", { "files": self.crd_data["spec"]["workload"]["args"]["files"], "file_size": self.crd_data["spec"]["workload"]["args"]["file_size"], "storageclass": self.crd_data["spec"]["workload"]["args"]["storageclass"], "vol_size": self.crd_data["spec"]["workload"]["args"]["storagesize"], }, ) return full_results def generate_kibana_link(self, index, columns): """ Generating full link to the Kibana server with full test results information Args: index (str): the kibana index name (results, response time, etc.) columns (str): list of all columns to display Return: str : an http link to the appropriate kibana report """ stime = self.start_time.replace("GMT", ".000Z") etime = self.end_time.replace("GMT", ".000Z") log.info( json.dumps(self.crd_data.get("spec").get("elasticsearch"), indent=2)) host = self.crd_data.get("spec").get("elasticsearch").get("url") try: host = host.split(":")[1].replace("//", "") except Exception: log.error("No ES configuretion") return "" kibana_id = self.get_kibana_indexid(host, index) app = "app/kibana#/discover" if self.dev_mode: app = "app/discover#/" result = ( f"http://{host}:5601/{app}" f"?_a=(columns:!({columns}),filters:!(),index:'{kibana_id}',interval:auto," f"query:(language:kuery,query:'uuid:{self.uuid}'),sort:!())" f"&_g=(filters:!(),refreshInterval:(pause:!t,value:0),time:(from:'{stime}',to:'{etime}'))" ) return result def collect_benchmark_logs(self): """ Collecting the test log from all benchmark pods """ # Getting full list of benchmark clients self.full_client_list = get_pod_name_by_pattern( self.client_pod_name, benchmark_operator.BMO_NAME) # Collecting logs from each pod for clpod in self.full_client_list: test_logs = self.pod_obj.exec_oc_cmd(f"logs {clpod}", out_yaml_format=False) log_file_name = f"{self.full_log_path}/{clpod}-pod.log" try: with open(log_file_name, "w") as f: f.write(test_logs) log.info(f"The Test log can be found at : {log_file_name}") except Exception: log.warning( f"Cannot write the log to the file {log_file_name}") log.info("Logs from all client pods got successfully") def run(self): log.info("Running SmallFile bench") self.deploy_and_wait_for_wl_to_start(timeout=240, sleep=10) # Getting the UUID from inside the benchmark pod self.uuid = self.operator.get_uuid(self.client_pod) self.wait_for_wl_to_finish(sleep=30) self.collect_benchmark_logs() try: if "RUN STATUS DONE" in self.test_logs: log.info("SmallFiles has completed successfully") return True except IOError: log.warning("SmallFiles failed to complete") return False def teardown(self): """ The teardown of the test environment in the end. """ log.info("cleanup the environment") if isinstance(self.es, ElasticSearch): self.es.cleanup() self.operator.cleanup() # wait up to 45 min for the ceph cluster be health OK after backend # operation completed. log.info("Verify (and wait if needed) that ceph health is OK") ceph_health_check(tries=45, delay=60) # Let the background operation (delete backed images) to finish time.sleep(120) @pytest.mark.parametrize( argnames=[ "file_size", "files", "threads", "samples", "clients", "interface" ], argvalues=[ pytest.param(*[4, 5000, 22, 5, 33, constants.CEPHBLOCKPOOL]), pytest.param(*[16, 5000, 8, 5, 21, constants.CEPHBLOCKPOOL]), pytest.param(*[4, 2500, 4, 5, 9, constants.CEPHFILESYSTEM]), pytest.param(*[16, 1500, 4, 5, 9, constants.CEPHFILESYSTEM]), ], ) @pytest.mark.polarion_id("OCS-1295") def test_smallfile_workload(self, file_size, files, threads, samples, clients, interface): """ Run SmallFile Workload Args: file_size (int) : the size of the file to be used files (int) : number of files to use threads (int) : number of threads to be use in the test samples (int) : how meany samples to run for each test interface (str) : the volume type (rbd / cephfs) """ # verify that there is an elasticsearch server for the benchmark if not self.es: log.error("This test must have an Elasticsearch server") return False # Getting the full path for the test logs self.full_log_path = get_full_test_logs_path(cname=self) self.results_path = get_full_test_logs_path(cname=self) self.full_log_path += ( f"-{file_size}-{files}-{threads}-{samples}-{clients}-{interface}") log.info(f"Logs file path name is : {self.full_log_path}") # Loading the main template yaml file for the benchmark log.info("Create resource file for small_files workload") self.crd_data = templating.load_yaml( constants.SMALLFILE_BENCHMARK_YAML) # Saving the Original elastic-search IP and PORT - if defined in yaml self.es_info_backup(self.es) self.set_storageclass(interface=interface) # Setting the data set to 40% of the total storage capacity self.setting_storage_usage(file_size, files, threads, samples, clients) self.get_env_info() if not self.run(): log.error("The benchmark failed to run !") return # Setting back the original elastic-search information if self.backup_es: self.crd_data["spec"]["elasticsearch"] = self.backup_es # Initialize the results doc file. full_results = self.init_full_results( SmallFileResultsAnalyse(self.uuid, self.crd_data, self.full_log_path, self.main_es)) log.info(f"Full results is : {full_results.results}") if isinstance(self.es, ElasticSearch): # Using internal deployed elasticsearch log.info("Getting data from internal ES") if self.main_es: self.copy_es_data(self.es) full_results.read() else: log.info("Dumping data from the Internal ES to tar ball file") self.es.dumping_all_data(self.full_log_path) else: log.info(self.es) self.es = Elasticsearch(hosts=[{ "host": self.es["server"], "port": self.es["port"] }]) full_results.read() full_results.add_key("test_time", { "start": self.start_time, "end": self.end_time }) if self.main_es: full_results.es = self.main_es if not full_results.dont_check: full_results.add_key("hosts", full_results.get_clients_list()) full_results.init_full_results() full_results.aggregate_host_results() test_status = full_results.aggregate_samples_results() # Generate link for the all data in the kibana columens = "optype,files,filesPerSec,elapsed,sample,tid" klink = self.generate_kibana_link("ripsaw-smallfile-results", columens) # Generate link for the all response-time data in the kibana columens = "optype,sample,iops,max,min,mean,'90%25','95%25','99%25'" rtlink = self.generate_kibana_link("ripsaw-smallfile-rsptimes", columens) full_results.all_results = { "kibana_all": klink, "kibana_rsptime": rtlink } if full_results.es_write(): res_link = full_results.results_link() log.info(f"The Result can be found at : {res_link}") # Create text file with results of all subtest (4 - according to the parameters) self.write_result_to_file(res_link) else: test_status = True assert test_status, "Test Failed !" def test_smallfile_results(self): """ This is not a test - it is only check that previous test ran and finish as expected and reporting the full results (links in the ES) of previous tests (4) """ # TODO : This function will push the results (if exists) to the performance dashboard. self.results_path = get_full_test_logs_path( cname=self, fname="test_smallfile_workload") self.results_file = os.path.join(self.results_path, "all_results.txt") log.info(f"Check results in {self.results_file}") try: input_file = open(self.results_file, "r") data = input_file.read().split("\n") data.pop() # remove the last empty element input_file.close() if len(data) != 4: log.error("Not all tests finished") raise exceptions.BenchmarkTestFailed() else: log.info( "All test finished OK, and the results can be found at :") for res in data: log.info(res) except OSError as err: log.error(f"OS error: {err}") raise err

Exemplo n.º 2

0

Exibir arquivo

class TestSmallFileWorkload(PASTest): """ Deploy benchmark operator and run SmallFile workload SmallFile workload using https://github.com/distributed-system-analysis/smallfile smallfile is a python-based distributed POSIX workload generator which can be used to quickly measure performance for a variety of metadata-intensive workloads """ def setup(self): """ Setting up test parameters """ log.info("Starting the test setup") self.benchmark_name = "SmallFiles" self.client_pod_name = "smallfile-client" if config.PERF.get("deploy_internal_es"): self.es = ElasticSearch() else: if config.PERF.get("internal_es_server") == "": self.es = None return else: self.es = { "server": config.PERF.get("internal_es_server"), "port": config.PERF.get("internal_es_port"), "url": f"http://{config.PERF.get('internal_es_server')}:{config.PERF.get('internal_es_port')}", } # verify that the connection to the elasticsearch server is OK if not super(TestSmallFileWorkload, self).es_connect(): self.es = None return super(TestSmallFileWorkload, self).setup() # deploy the benchmark-operator self.deploy_benchmark_operator() def setting_storage_usage(self, file_size, files, threads, samples): """ Getting the storage capacity, calculate the usage of the storage and setting the workload CR rile parameters. Args: file_size (int) : the size of the file to be used files (int) : number of files to use threads (int) : number of threads to be use in the test samples (int) : how meany samples to run for each test """ self.crd_data["spec"]["workload"]["args"]["file_size"] = file_size self.crd_data["spec"]["workload"]["args"]["files"] = files self.crd_data["spec"]["workload"]["args"]["threads"] = threads self.crd_data["spec"]["workload"]["args"]["samples"] = samples # Calculating the size of the volume that need to be test, it should # be at least twice in the size then the size of the files, and at # least 100Gi. # Since the file_size is in Kb and the vol_size need to be in Gb, more # calculation is needed. vol_size = int(files * threads * file_size * 3) vol_size = int(vol_size / constants.GB2KB) if vol_size < 100: vol_size = 100 self.crd_data["spec"]["workload"]["args"][ "storagesize"] = f"{vol_size}Gi" def init_full_results(self, full_results): """ Initialize the full results object which will send to the ES server Args: full_results (obj): an empty SmallFileResultsAnalyse object Returns: SmallFileResultsAnalyse (obj): the input object fill with data """ for key in self.environment: full_results.add_key(key, self.environment[key]) # Calculating the total size of the working data set - in GB full_results.add_key( "dataset", self.crd_data["spec"]["workload"]["args"]["file_size"] * self.crd_data["spec"]["workload"]["args"]["files"] * self.crd_data["spec"]["workload"]["args"]["threads"] * full_results.results["clients"] / constants.GB2KB, ) full_results.add_key( "global_options", { "files": self.crd_data["spec"]["workload"]["args"]["files"], "file_size": self.crd_data["spec"]["workload"]["args"]["file_size"], "storageclass": self.crd_data["spec"]["workload"]["args"]["storageclass"], "vol_size": self.crd_data["spec"]["workload"]["args"]["storagesize"], }, ) return full_results def run(self): log.info("Running SmallFile bench") self.deploy_and_wait_for_wl_to_start(timeout=240, sleep=10) # Getting the UUID from inside the benchmark pod self.uuid = self.operator.get_uuid(self.client_pod) self.wait_for_wl_to_finish(sleep=30) try: if "RUN STATUS DONE" in self.test_logs: log.info("SmallFiles has completed successfully") return True except IOError: log.warning("SmallFiles failed to complete") return False def teardown(self): """ The teardown of the test environment in the end. """ log.info("cleanup the environment") if isinstance(self.es, ElasticSearch): self.es.cleanup() self.operator.cleanup() # wait up to 45 min for the ceph cluster be health OK after backend # operation completed. log.info("Verify (and wait if needed) that ceph health is OK") ceph_health_check(tries=45, delay=60) @pytest.mark.parametrize( argnames=["file_size", "files", "threads", "samples", "interface"], argvalues=[ pytest.param( *[4, 50000, 4, 3, constants.CEPHBLOCKPOOL], marks=pytest.mark.polarion_id("OCS-1295"), ), pytest.param( *[16, 50000, 4, 3, constants.CEPHBLOCKPOOL], marks=pytest.mark.polarion_id("OCS-2020"), ), pytest.param( *[16, 200000, 4, 3, constants.CEPHBLOCKPOOL], marks=pytest.mark.polarion_id("OCS-2021"), ), pytest.param( *[4, 50000, 4, 3, constants.CEPHFILESYSTEM], marks=pytest.mark.polarion_id("OCS-2022"), ), pytest.param( *[16, 50000, 4, 3, constants.CEPHFILESYSTEM], marks=pytest.mark.polarion_id("OCS-2023"), ), ], ) @pytest.mark.polarion_id("OCS-1295") def test_smallfile_workload(self, file_size, files, threads, samples, interface): """ Run SmallFile Workload Args: file_size (int) : the size of the file to be used files (int) : number of files to use threads (int) : number of threads to be use in the test samples (int) : how meany samples to run for each test interface (str) : the volume type (rbd / cephfs) """ # verify that there is an elasticsearch server for the benchmark if not self.es: log.error("This test must have an Elasticsearch server") return False # Getting the full path for the test logs self.full_log_path = get_full_test_logs_path(cname=self) self.full_log_path += f"-{file_size}-{files}-{threads}-{samples}-{interface}" log.info(f"Logs file path name is : {self.full_log_path}") # Loading the main template yaml file for the benchmark log.info("Create resource file for smallfiles workload") self.crd_data = templating.load_yaml( constants.SMALLFILE_BENCHMARK_YAML) # Saving the Original elastic-search IP and PORT - if defined in yaml self.es_info_backup(self.es) self.set_storageclass(interface=interface) # Setting the data set to 40% of the total storage capacity self.setting_storage_usage(file_size, files, threads, samples) self.get_env_info() if not self.run(): log.error("The benchmark failed to run !") return # Setting back the original elastic-search information if self.backup_es: self.crd_data["spec"]["elasticsearch"] = self.backup_es # Initialize the results doc file. full_results = self.init_full_results( SmallFileResultsAnalyse(self.uuid, self.crd_data, self.full_log_path, self.main_es)) log.info(f"Full results is : {full_results.results}") if isinstance(self.es, ElasticSearch): # Using internal deployed elasticsearch log.info("Getting data from internal ES") if self.main_es: self.copy_es_data(self.es) full_results.read() else: log.info("Dumping data from the Internal ES to tar ball file") self.es.dumping_all_data(self.full_log_path) else: log.info(self.es) self.es = Elasticsearch(hosts=[{ "host": self.es["server"], "port": self.es["port"] }]) full_results.read() full_results.add_key("test_time", { "start": self.start_time, "end": self.end_time }) if self.main_es: full_results.es = self.main_es if not full_results.dont_check: full_results.add_key("hosts", full_results.get_clients_list()) full_results.init_full_results() full_results.aggregate_host_results() test_status = full_results.aggregate_samples_results() full_results.all_results = None if full_results.es_write(): log.info( f"The Result can be found at : {full_results.results_link()}" ) else: test_status = True assert test_status, "Test Failed !"