def parse_task(self, task_spec, ops, challenge_name, default_warmup_iterations=0, default_iterations=1, default_warmup_time_period=None, default_time_period=None, completed_by_name=None): op_spec = task_spec["operation"] if isinstance(op_spec, str) and op_spec in ops: op = ops[op_spec] else: # may as well an inline operation op = self.parse_operation(op_spec, error_ctx="inline operation in challenge %s" % challenge_name) schedule = self._r(task_spec, "schedule", error_ctx=op.name, mandatory=False, default_value="deterministic") task = track.Task(name=self._r(task_spec, "name", error_ctx=op.name, mandatory=False, default_value=op.name), operation=op, meta_data=self._r(task_spec, "meta", error_ctx=op.name, mandatory=False), warmup_iterations=self._r(task_spec, "warmup-iterations", error_ctx=op.name, mandatory=False, default_value=default_warmup_iterations), iterations=self._r(task_spec, "iterations", error_ctx=op.name, mandatory=False, default_value=default_iterations), warmup_time_period=self._r(task_spec, "warmup-time-period", error_ctx=op.name, mandatory=False, default_value=default_warmup_time_period), time_period=self._r(task_spec, "time-period", error_ctx=op.name, mandatory=False, default_value=default_time_period), clients=self._r(task_spec, "clients", error_ctx=op.name, mandatory=False, default_value=1), # this will work because op_name must always be set, i.e. it is never `None`. completes_parent=(op.name == completed_by_name), schedule=schedule, params=task_spec) if task.warmup_iterations != default_warmup_iterations and task.time_period is not None: self._error("Operation '%s' in challenge '%s' defines '%d' warmup iterations and a time period of '%d' seconds. Please do not " "mix time periods and iterations." % (op.name, challenge_name, task.warmup_iterations, task.time_period)) elif task.warmup_time_period is not None and task.iterations != default_iterations: self._error("Operation '%s' in challenge '%s' defines a warmup time period of '%d' seconds and '%d' iterations. Please do not " "mix time periods and iterations." % (op.name, challenge_name, task.warmup_time_period, task.iterations)) return task
def test_scheduler_adapts_to_changed_weights(self): task = track.Task( name="bulk-index", operation=track.Operation(name="bulk-index", operation_type=track.OperationType.Bulk. to_hyphenated_string()), clients=4, params={"target-throughput": "5000 docs/s"}, ) s = scheduler.UnitAwareScheduler( task=task, scheduler_class=scheduler.DeterministicScheduler) # first request is unthrottled # suppress pylint false positive # pylint: disable=not-callable assert s.next(0) == 0 # we'll start with bulks of 1.000 docs, which corresponds to 5 requests per second for all clients s.after_request(now=None, weight=1000, unit="docs", request_meta_data=None) # suppress pylint false positive # pylint: disable=not-callable assert s.next(0) == 1 / 5 * task.clients # bulk size changes to 10.000 docs, which means one request every two seconds for all clients s.after_request(now=None, weight=10000, unit="docs", request_meta_data=None) # suppress pylint false positive # pylint: disable=not-callable assert s.next(0) == 2 * task.clients
def test_scheduler_accepts_differing_units_pages_and_ops(self): task = track.Task( name="scroll-query", operation=track.Operation(name="scroll-query", operation_type=track.OperationType. Search.to_hyphenated_string()), clients=1, params={ # implicitly: ops/s "target-throughput": 10 }, ) s = scheduler.UnitAwareScheduler( task=task, scheduler_class=scheduler.DeterministicScheduler) # first request is unthrottled # suppress pylint false positive # pylint: disable=not-callable assert s.next(0) == 0 # no exception despite differing units ... s.after_request(now=None, weight=20, unit="pages", request_meta_data=None) # ... and it is still throttled in ops/s # suppress pylint false positive # pylint: disable=not-callable assert s.next(0) == 0.1 * task.clients
def test_scheduler_does_not_change_throughput_for_empty_requests(self): task = track.Task( name="match-all-query", operation=track.Operation(name="query", operation_type=track.OperationType. Search.to_hyphenated_string()), clients=1, params={ # implicitly: ops/s "target-throughput": 10 }, ) s = scheduler.UnitAwareScheduler( task=task, scheduler_class=scheduler.DeterministicScheduler) # first request is unthrottled... s.before_request(now=0) # suppress pylint false positive # pylint: disable=not-callable assert s.next(0) == 0 # ... but it also produced an error (zero ops) s.after_request(now=1, weight=0, unit="ops", request_meta_data=None) # next request is still unthrottled s.before_request(now=1) # suppress pylint false positive # pylint: disable=not-callable assert s.next(0) == 0 s.after_request(now=2, weight=1, unit="ops", request_meta_data=None) # now we throttle s.before_request(now=2) # suppress pylint false positive # pylint: disable=not-callable assert s.next(0) == 0.1 * task.clients
def test_scheduler_adapts_to_changed_weights(self): task = track.Task(name="bulk-index", operation=track.Operation( name="bulk-index", operation_type=track.OperationType.Bulk.name), clients=4, params={"target-throughput": "5000 docs/s"}) s = scheduler.UnitAwareScheduler( task=task, scheduler_class=scheduler.DeterministicScheduler) # first request is unthrottled self.assertEqual(0, s.next(0)) # we'll start with bulks of 1.000 docs, which corresponds to 5 requests per second for all clients s.after_request(now=None, weight=1000, unit="docs", request_meta_data=None) self.assertEqual(1 / 5 * task.clients, s.next(0)) # bulk size changes to 10.000 docs, which means one request every two seconds for all clients s.after_request(now=None, weight=10000, unit="docs", request_meta_data=None) self.assertEqual(2 * task.clients, s.next(0))
def parse_task(self, task_spec, ops, challenge_name, default_warmup_iterations=0, default_iterations=1, default_warmup_time_period=None, default_time_period=None): op_name = task_spec["operation"] if op_name not in ops: self._error("'schedule' for challenge '%s' contains a non-existing operation '%s'. " "Please add an operation '%s' to the 'operations' block." % (challenge_name, op_name, op_name)) target_interval = self._r(task_spec, "target-interval", error_ctx=op_name, mandatory=False) target_throughput = self._r(task_spec, "target-throughput", error_ctx=op_name, mandatory=False) if target_interval is not None and target_throughput is not None: self._error("Operation '%s' in challenge '%s' specifies target-interval and target-throughput but only one of them is allowed." % (op_name, challenge_name)) if target_interval: target_throughput = 1 / target_interval task = track.Task(operation=ops[op_name], meta_data=self._r(task_spec, "meta", error_ctx=op_name, mandatory=False), warmup_iterations=self._r(task_spec, "warmup-iterations", error_ctx=op_name, mandatory=False, default_value=default_warmup_iterations), iterations=self._r(task_spec, "iterations", error_ctx=op_name, mandatory=False, default_value=default_iterations), warmup_time_period=self._r(task_spec, "warmup-time-period", error_ctx=op_name, mandatory=False, default_value=default_warmup_time_period), time_period=self._r(task_spec, "time-period", error_ctx=op_name, mandatory=False, default_value=default_time_period), clients=self._r(task_spec, "clients", error_ctx=op_name, mandatory=False, default_value=1), target_throughput=target_throughput) if task.warmup_iterations != default_warmup_iterations and task.time_period is not None: self._error("Operation '%s' in challenge '%s' defines '%d' warmup iterations and a time period of '%d' seconds. Please do not " "mix time periods and iterations." % (op_name, challenge_name, task.warmup_iterations, task.time_period)) elif task.warmup_time_period is not None and task.iterations != default_iterations: self._error("Operation '%s' in challenge '%s' defines a warmup time period of '%d' seconds and '%d' iterations. Please do not " "mix time periods and iterations." % (op_name, challenge_name, task.warmup_time_period, task.iterations)) return task
def parse_task(self, task_spec, ops, challenge_name, default_warmup_iterations=0, default_iterations=1, default_warmup_time_period=None, default_time_period=None, completed_by_name=None): op_name = task_spec["operation"] if op_name not in ops: self._error("'schedule' for challenge '%s' contains a non-existing operation '%s'. " "Please add an operation '%s' to the 'operations' block." % (challenge_name, op_name, op_name)) schedule = self._r(task_spec, "schedule", error_ctx=op_name, mandatory=False, default_value="deterministic") task = track.Task(operation=ops[op_name], meta_data=self._r(task_spec, "meta", error_ctx=op_name, mandatory=False), warmup_iterations=self._r(task_spec, "warmup-iterations", error_ctx=op_name, mandatory=False, default_value=default_warmup_iterations), iterations=self._r(task_spec, "iterations", error_ctx=op_name, mandatory=False, default_value=default_iterations), warmup_time_period=self._r(task_spec, "warmup-time-period", error_ctx=op_name, mandatory=False, default_value=default_warmup_time_period), time_period=self._r(task_spec, "time-period", error_ctx=op_name, mandatory=False, default_value=default_time_period), clients=self._r(task_spec, "clients", error_ctx=op_name, mandatory=False, default_value=1), # this will work because op_name must always be set, i.e. it is never `None`. completes_parent=(op_name == completed_by_name), schedule=schedule, params=task_spec) if task.warmup_iterations != default_warmup_iterations and task.time_period is not None: self._error("Operation '%s' in challenge '%s' defines '%d' warmup iterations and a time period of '%d' seconds. Please do not " "mix time periods and iterations." % (op_name, challenge_name, task.warmup_iterations, task.time_period)) elif task.warmup_time_period is not None and task.iterations != default_iterations: self._error("Operation '%s' in challenge '%s' defines a warmup time period of '%d' seconds and '%d' iterations. Please do not " "mix time periods and iterations." % (op_name, challenge_name, task.warmup_time_period, task.iterations)) return task
def create_index_task(self): return track.Task( "create-index-task", track.Operation("create-index-op", operation_type=track.OperationType.CreateIndex. to_hyphenated_string()), tags=["write-op", "admin-op"], )
def task(self, schedule=None, target_throughput=None, target_interval=None): op = track.Operation("bulk-index", track.OperationType.Bulk.to_hyphenated_string()) params = {} if target_throughput is not None: params["target-throughput"] = target_throughput if target_interval is not None: params["target-interval"] = target_interval return track.Task("test", op, schedule=schedule, params=params)
def task(self, schedule=None, target_throughput=None, target_interval=None): op = track.Operation("bulk-index", track.OperationType.Bulk.name) params = {} if target_throughput: params["target-throughput"] = target_throughput if target_interval: params["target-interval"] = target_interval return track.Task("test", op, schedule=schedule, params=params)
def test_legacy_scheduler(self): task = track.Task( name="raw-request", operation=track.Operation(name="raw", operation_type=track.OperationType.RawRequest.to_hyphenated_string()), clients=1, schedule="simple", ) s = scheduler.scheduler_for(task) self.assertEqual(0, s.next(0)) self.assertEqual(0, s.next(0))
def parse_task(self, task_spec, ops, challenge_name, default_warmup_iterations=0, default_iterations=1): op_name = task_spec["operation"] if op_name not in ops: self._error( "'schedule' for challenge '%s' contains a non-existing operation '%s'. " "Please add an operation '%s' to the 'operations' block." % (challenge_name, op_name, op_name)) task = track.Task(operation=ops[op_name], warmup_iterations=self._r( task_spec, "warmup-iterations", error_ctx=op_name, mandatory=False, default_value=default_warmup_iterations), iterations=self._r(task_spec, "iterations", error_ctx=op_name, mandatory=False, default_value=default_iterations), warmup_time_period=self._r(task_spec, "warmup-time-period", error_ctx=op_name, mandatory=False), time_period=self._r(task_spec, "time-period", error_ctx=op_name, mandatory=False), clients=self._r(task_spec, "clients", error_ctx=op_name, mandatory=False, default_value=1), target_throughput=self._r(task_spec, "target-throughput", error_ctx=op_name, mandatory=False)) if task.warmup_iterations != default_warmup_iterations and task.time_period is not None: self._error( "Operation '%s' in challenge '%s' mixes warmup iterations with time periods. Please do not mix time periods and " "iterations." % (op_name, challenge_name)) elif task.warmup_time_period is not None and task.iterations != default_iterations: self._error( "Operation '%s' in challenge '%s' mixes warmup time period with iterations. Please do not mix time periods and " "iterations." % (op_name, challenge_name)) return task
def task(self, schedule=None, target_throughput=None, target_interval=None, ignore_response_error_level=None): op = track.Operation("bulk-index", track.OperationType.Bulk.to_hyphenated_string()) params = {} if target_throughput is not None: params["target-throughput"] = target_throughput if target_interval is not None: params["target-interval"] = target_interval if ignore_response_error_level is not None: params["ignore-response-error-level"] = ignore_response_error_level return track.Task("test", op, schedule=schedule, params=params)
def test_scheduler_rejects_differing_throughput_units(self): task = track.Task( name="bulk-index", operation=track.Operation(name="bulk-index", operation_type=track.OperationType.Bulk.to_hyphenated_string()), clients=4, params={"target-throughput": "5000 MB/s"}, ) s = scheduler.UnitAwareScheduler(task=task, scheduler_class=scheduler.DeterministicScheduler) with self.assertRaises(exceptions.RallyAssertionError) as ex: s.after_request(now=None, weight=1000, unit="docs", request_meta_data=None) self.assertEqual( "Target throughput for [bulk-index] is specified in [MB/s] but the task throughput is measured in [docs/s].", ex.exception.args[0], )
def test_sets_absolute_path(self): from esrally import config from esrally.track import track cfg = config.Config() cfg.add(config.Scope.application, "benchmarks", "local.dataset.cache", "/data") default_challenge = track.Challenge( "default", description="default challenge", default=True, schedule=[ track.Task(operation=track.Operation( "index", operation_type=track.OperationType.Index), clients=4) ]) another_challenge = track.Challenge( "other", description="non-default challenge", default=False) t = track.Track( name="unittest", short_description="unittest track", challenges=[another_challenge, default_challenge], indices=[ track.Index(name="test", auto_managed=True, types=[ track.Type( "docs", mapping={}, document_file="docs/documents.json", document_archive="docs/documents.json.bz2") ]) ]) loader.set_absolute_data_path(cfg, t) self.assertEqual("/data/docs/documents.json", t.indices[0].types[0].document_file) self.assertEqual("/data/docs/documents.json.bz2", t.indices[0].types[0].document_archive)
def test_scheduler_accepts_differing_units_pages_and_ops(self): task = track.Task( name="scroll-query", operation=track.Operation( name="scroll-query", operation_type=track.OperationType.Search.name), clients=1, params={ # implicitly: ops/s "target-throughput": 10 }) s = scheduler.UnitAwareScheduler( task=task, scheduler_class=scheduler.DeterministicScheduler) # first request is unthrottled self.assertEqual(0, s.next(0)) # no exception despite differing units ... s.after_request(now=None, weight=20, unit="pages", request_meta_data=None) # ... and it is still throttled in ops/s self.assertEqual(0.1 * task.clients, s.next(0))
def parse_task(self, task_spec, ops, challenge_name, default_warmup_iterations=0, default_iterations=1): op_name = task_spec["operation"] if op_name not in ops: self._error( "'schedule' for challenge '%s' contains a non-existing operation '%s'. " "Please add an operation '%s' to the 'operations' block." % (challenge_name, op_name, op_name)) return track.Task(operation=ops[op_name], warmup_iterations=self._r( task_spec, "warmup-iterations", error_ctx=op_name, mandatory=False, default_value=default_warmup_iterations), warmup_time_period=self._r(task_spec, "warmup-time-period", error_ctx=op_name, mandatory=False), iterations=self._r(task_spec, "iterations", error_ctx=op_name, mandatory=False, default_value=default_iterations), clients=self._r(task_spec, "clients", error_ctx=op_name, mandatory=False, default_value=1), target_throughput=self._r(task_spec, "target-throughput", error_ctx=op_name, mandatory=False))
def test_run_benchmark(self): cfg = config.Config() cfg.add(config.Scope.application, "system", "env.name", "unittest") cfg.add( config.Scope.application, "system", "time.start", datetime(year=2017, month=8, day=20, hour=1, minute=0, second=0)) cfg.add(config.Scope.application, "system", "race.id", "6ebc6e53-ee20-4b0c-99b4-09697987e9f4") cfg.add(config.Scope.application, "system", "offline.mode", False) cfg.add(config.Scope.application, "driver", "on.error", "abort") cfg.add(config.Scope.application, "driver", "profiling", False) cfg.add(config.Scope.application, "reporting", "datastore.type", "in-memory") cfg.add(config.Scope.application, "track", "params", {}) cfg.add(config.Scope.application, "track", "test.mode.enabled", True) cfg.add(config.Scope.application, "telemetry", "devices", []) cfg.add(config.Scope.application, "telemetry", "params", {}) cfg.add(config.Scope.application, "mechanic", "car.names", ["external"]) cfg.add(config.Scope.application, "mechanic", "skip.rest.api.check", True) cfg.add( config.Scope.application, "client", "hosts", AsyncDriverTests.Holder(all_hosts={"default": ["localhost:9200"]})) cfg.add(config.Scope.application, "client", "options", AsyncDriverTests.Holder(all_client_options={"default": {}})) params.register_param_source_for_name("bulk-param-source", AsyncDriverTestParamSource) task = track.Task( name="bulk-index", operation=track.Operation( "bulk-index", track.OperationType.Bulk.name, params={ "body": ["action_metadata_line", "index_line"], "action-metadata-present": True, "bulk-size": 1, # we need this because the parameter source does not know that we only have one # bulk and hence size() returns incorrect results "size": 1 }, param_source="bulk-param-source"), warmup_iterations=0, iterations=1, clients=1) current_challenge = track.Challenge(name="default", default=True, schedule=[task]) current_track = track.Track(name="unit-test", challenges=[current_challenge]) driver = async_driver.AsyncDriver( cfg, current_track, current_challenge, es_client_factory_class=StaticClientFactory) distribution_flavor, distribution_version, revision = driver.setup() self.assertEqual("oss", distribution_flavor) self.assertEqual("7.3.0", distribution_version) self.assertEqual("de777fa", revision) metrics_store_representation = driver.run() metric_store = metrics.metrics_store(cfg, read_only=True, track=current_track, challenge=current_challenge) metric_store.bulk_add(metrics_store_representation) self.assertIsNotNone( metric_store.get_one(name="latency", task="bulk-index", sample_type=metrics.SampleType.Normal)) self.assertIsNotNone( metric_store.get_one(name="service_time", task="bulk-index", sample_type=metrics.SampleType.Normal)) self.assertIsNotNone( metric_store.get_one(name="processing_time", task="bulk-index", sample_type=metrics.SampleType.Normal)) self.assertIsNotNone( metric_store.get_one(name="throughput", task="bulk-index", sample_type=metrics.SampleType.Normal)) self.assertIsNotNone( metric_store.get_one(name="node_total_young_gen_gc_time", sample_type=metrics.SampleType.Normal)) self.assertIsNotNone( metric_store.get_one(name="node_total_old_gen_gc_time", sample_type=metrics.SampleType.Normal)) # ensure that there are not more documents than we expect self.assertEqual(6, len(metric_store.docs), msg=json.dumps(metric_store.docs, indent=2))
def search_task(self): return track.Task( "search-task", track.Operation("search-op", operation_type=track.OperationType.Search. to_hyphenated_string()))
def post_process_for_index_auto_management(t, expected_cluster_health): auto_managed_indices = any([index.auto_managed for index in t.indices]) # spare users this warning for our default tracks if auto_managed_indices and t.name not in DEFAULT_TRACKS: console.warn("Track [%s] uses index auto-management which will be removed soon. Please add [delete-index] and [create-index] " "tasks at the beginning of each relevant challenge and turn off index auto-management for each index. For details " "please see the migration guide in the docs." % t.name) if auto_managed_indices or len(t.templates) > 0: for challenge in t.challenges: tasks = [] # TODO: Remove the index settings element. We can do this much better now with the create-index operation. create_index_params = {"include-in-reporting": False} if challenge.index_settings: if t.name not in DEFAULT_TRACKS: console.warn("Track [%s] defines the deprecated property 'index-settings'. Please create indices explicitly with " "[create-index] and the respective index settings there instead." % t.name) create_index_params["settings"] = challenge.index_settings if len(t.templates) > 0: # check if the user has defined a create index template operation user_creates_templates = any(task.matches(track.TaskOpTypeFilter(track.OperationType.CreateIndexTemplate.name)) for task in challenge.schedule) # We attempt to still do this automatically but issue a warning so that the user will create it themselves. if not user_creates_templates: console.warn("Track [%s] defines %d index template(s) but soon Rally will not create them implicitly anymore. Please " "add [delete-index-template] and [create-index-template] tasks at the beginning of the challenge %s." % (t.name, len(t.templates), challenge.name), logger=logger) tasks.append(track.Task(name="auto-delete-index-templates", operation=track.Operation(name="auto-delete-index-templates", operation_type=track.OperationType.DeleteIndexTemplate.name, params={ "include-in-reporting": False, "only-if-exists": True }))) tasks.append(track.Task(name="auto-create-index-templates", operation=track.Operation(name="auto-create-index-templates", operation_type=track.OperationType.CreateIndexTemplate.name, params=create_index_params.copy()))) if auto_managed_indices: tasks.append(track.Task(name="auto-delete-indices", operation=track.Operation(name="auto-delete-indices", operation_type=track.OperationType.DeleteIndex.name, params={ "include-in-reporting": False, "only-if-exists": True }))) tasks.append(track.Task(name="auto-create-indices", operation=track.Operation(name="auto-create-indices", operation_type=track.OperationType.CreateIndex.name, params=create_index_params.copy()))) # check if the user has already defined a cluster-health operation user_checks_cluster_health = any(task.matches(track.TaskOpTypeFilter(track.OperationType.ClusterHealth.name)) for task in challenge.schedule) if expected_cluster_health != "skip" and not user_checks_cluster_health: tasks.append(track.Task(name="auto-check-cluster-health", operation=track.Operation(name="auto-check-cluster-health", operation_type=track.OperationType.ClusterHealth.name, params={ "include-in-reporting": False, "request-params": { "wait_for_status": expected_cluster_health } }))) challenge.prepend_tasks(tasks) return t else: return t