def dump_documents(client, index, out_path, total_docs, progress_message_suffix=""): # pylint: disable=import-outside-toplevel from elasticsearch import helpers logger = logging.getLogger(__name__) freq = max(1, total_docs // 1000) progress = console.progress() compressor = DOCS_COMPRESSOR() comp_outpath = out_path + COMP_EXT with open(out_path, "wb") as outfile: with open(comp_outpath, "wb") as comp_outfile: logger.info("Dumping corpus for index [%s] to [%s].", index, out_path) query = {"query": {"match_all": {}}} for n, doc in enumerate( helpers.scan(client, query=query, index=index)): if n >= total_docs: break data = (json.dumps(doc["_source"], separators=(",", ":")) + "\n").encode("utf-8") outfile.write(data) comp_outfile.write(compressor.compress(data)) render_progress(progress, progress_message_suffix, index, n + 1, total_docs, freq) comp_outfile.write(compressor.flush()) progress.finish()
def __init__(self, config, track, challenge, es_client_factory_class=client.EsClientFactory): self.logger = logging.getLogger(__name__) self.config = config self.track = track self.challenge = challenge self.es_client_factory = es_client_factory_class self.metrics_store = None self.progress_reporter = console.progress() self.throughput_calculator = driver.ThroughputCalculator() self.raw_samples = [] self.most_recent_sample_per_client = {} self.current_tasks = [] self.telemetry = None self.es_clients = None self.quiet = self.config.opts("system", "quiet.mode", mandatory=False, default_value=False) # TODO: Change the default value to `False` once this implementation becomes the default self.debug_event_loop = self.config.opts("system", "async.debug", mandatory=False, default_value=True) self.abort_on_error = self.config.opts("driver", "on.error") == "abort" self.profiling_enabled = self.config.opts("driver", "profiling") self.sampler = None
def __init__(self, msg, accuracy=0): self.p = console.progress() # if we don't show a decimal sign, the maximum width is 3 (max value is 100 (%)). Else its 3 + 1 (for the decimal point) # the accuracy that the user requested. total_width = 3 if accuracy == 0 else 4 + accuracy # sample formatting string: [%5.1f%%] for an accuracy of 1 self.percent_format = "[%%%d.%df%%%%]" % (total_width, accuracy) self.msg = msg
def __init__(self, msg, accuracy=0): from esrally.utils import console self.p = console.progress() # if we don't show a decimal sign, the maximum width is 3 (max value is 100 (%)). Else its 3 + 1 (for the decimal point) # the accuracy that the user requested. total_width = 3 if accuracy == 0 else 4 + accuracy # sample formatting string: [%5.1f%%] for an accuracy of 1 self.percent_format = "[%%%d.%df%%%%]" % (total_width, accuracy) self.msg = msg
def __init__(self): super().__init__() self.config = None # Elasticsearch client self.es = None self.metrics_store = None self.raw_samples = [] self.currently_completed = 0 self.clients_completed_current_step = {} self.current_step = -1 self.number_of_steps = 0 self.start_sender = None self.allocations = None self.join_points = None self.ops_per_join_point = None self.drivers = [] self.progress_reporter = console.progress() self.progress_counter = 0 self.quiet = False self.most_recent_sample_per_client = {}