def start(self): hosts = self.cfg.opts("client", "hosts") client_options = self.cfg.opts("client", "options") es = self.client_factory(hosts, client_options).create() t = telemetry.Telemetry(devices=[ telemetry.ClusterMetaDataInfo(es), telemetry.ClusterEnvironmentInfo(es, self.metrics_store), telemetry.NodeStats(es, self.metrics_store), telemetry.IndexStats(es, self.metrics_store) ]) # The list of nodes will be populated by ClusterMetaDataInfo, so no need to do it here c = cluster.Cluster(hosts, [], t) logger.info( "All cluster nodes have successfully started. Checking if REST API is available." ) if wait_for_rest_layer(es, max_attempts=20): logger.info( "REST API is available. Attaching telemetry devices to cluster." ) t.attach_to_cluster(c) logger.info("Telemetry devices are now attached to the cluster.") else: # Just stop the cluster here and raise. The caller is responsible for terminating individual nodes. logger.error( "REST API layer is not yet available. Forcefully terminating cluster." ) self.stop(c) raise exceptions.LaunchError( "Elasticsearch REST API layer is not available. Forcefully terminated cluster." ) return c
def start(self, car=None, binary=None, data_paths=None): console.println(ExternalLauncher.BOGUS_RESULTS_WARNING) hosts = self.cfg.opts("client", "hosts") client_options = self.cfg.opts("client", "options") es = self.client_factory(hosts, client_options).create() # cannot enable custom telemetry devices here t = telemetry.Telemetry(devices=[ telemetry.ClusterMetaDataInfo(es), telemetry.ExternalEnvironmentInfo(es, self.metrics_store), telemetry.NodeStats(es, self.metrics_store), telemetry.IndexStats(es, self.metrics_store) ]) # cluster nodes will be populated by the external environment info telemetry device. We cannot know this upfront. c = cluster.Cluster(hosts, [], t) user_defined_version = self.cfg.opts("mechanic", "distribution.version", mandatory=False) distribution_version = es.info()["version"]["number"] if not user_defined_version or user_defined_version.strip() == "": logger.info("Distribution version was not specified by user. Rally-determined version is [%s]" % distribution_version) self.cfg.add(config.Scope.benchmark, "mechanic", "distribution.version", distribution_version) elif user_defined_version != distribution_version: console.warn( "Specified distribution version '%s' on the command line differs from version '%s' reported by the cluster." % (user_defined_version, distribution_version), logger=logger) t.attach_to_cluster(c) return c
def start(self, car): port = self.cfg.opts("provisioning", "node.http.port") hosts = [{"host": "localhost", "port": port}] client_options = self.cfg.opts("launcher", "client.options") # unified client config self.cfg.add(config.Scope.benchmark, "client", "hosts", hosts) self.cfg.add(config.Scope.benchmark, "client", "options", client_options) es = client.EsClientFactory(hosts, client_options).create() # we're very specific which nodes we kill as there is potentially also an Elasticsearch based metrics store running on this machine node_prefix = self.cfg.opts("provisioning", "node.name.prefix") process.kill_running_es_instances(node_prefix) logger.info("Starting a cluster based on car [%s] with [%d] nodes." % (car, car.nodes)) cluster_telemetry = [ # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster telemetry.MergeParts(self.cfg, self.metrics_store), telemetry.EnvironmentInfo(self.cfg, es, self.metrics_store), telemetry.NodeStats(self.cfg, es, self.metrics_store), telemetry.IndexStats(self.cfg, es, self.metrics_store), # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster telemetry.IndexSize(self.cfg, self.metrics_store) ] t = telemetry.Telemetry(self.cfg, devices=cluster_telemetry) c = cluster.Cluster( [self._start_node(node, car, es) for node in range(car.nodes)], t) t.attach_to_cluster(c) return c
def start(self, car, binary, data_paths): self.binary_path = binary hosts = self.cfg.opts("client", "hosts") client_options = self.cfg.opts("client", "options") es = self.client_factory(hosts, client_options).create() # Cannot enable custom telemetry devices here t = telemetry.Telemetry(devices=[ # Be aware that some the meta-data are taken from the host system, not the container (e.g. number of CPU cores) so if the # Docker container constrains these, the metrics are actually wrong. telemetry.EnvironmentInfo(es, self.metrics_store), telemetry.NodeStats(es, self.metrics_store), telemetry.IndexStats(es, self.metrics_store), telemetry.DiskIo(self.metrics_store), telemetry.CpuUsage(self.metrics_store) ]) c = cluster.Cluster(hosts, [], t) self._start_process(cmd="docker-compose -f %s up" % self.binary_path, node_name="rally0") logger.info("Docker container has successfully started. Checking if REST API is available.") if wait_for_rest_layer(es): logger.info("REST API is available. Attaching telemetry devices to cluster.") t.attach_to_cluster(c) logger.info("Telemetry devices are now attached to the cluster.") else: logger.error("REST API layer is not yet available. Forcefully terminating cluster.") self.stop(c) raise exceptions.LaunchError("Elasticsearch REST API layer is not available. Forcefully terminated cluster.") return c
def start(self, car, binary, data_paths): hosts = self.cfg.opts("client", "hosts") client_options = self.cfg.opts("client", "options") es = client.EsClientFactory(hosts, client_options).create() # we're very specific which nodes we kill as there is potentially also an Elasticsearch based metrics store running on this machine node_prefix = self.cfg.opts("provisioning", "node.name.prefix") process.kill_running_es_instances(node_prefix) logger.info("Starting a cluster based on car [%s] with [%d] nodes." % (car, car.nodes)) # TODO dm: Get rid of these... enabled_devices = self.cfg.opts("mechanic", "telemetry.devices") cluster_telemetry = [ # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster telemetry.MergeParts(self.metrics_store, self.node_log_dir), telemetry.EnvironmentInfo(es, self.metrics_store), telemetry.NodeStats(es, self.metrics_store), telemetry.IndexStats(es, self.metrics_store), # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster telemetry.IndexSize(data_paths, self.metrics_store) ] t = telemetry.Telemetry(enabled_devices, devices=cluster_telemetry) c = cluster.Cluster(hosts, [self._start_node(node, car, es, binary) for node in range(car.nodes)], t) logger.info("All cluster nodes have successfully started. Checking if REST API is available.") if wait_for_rest_layer(es): logger.info("REST API is available. Attaching telemetry devices to cluster.") t.attach_to_cluster(c) logger.info("Telemetry devices are now attached to the cluster.") else: logger.error("REST API layer is not yet available. Forcefully terminating cluster.") self.stop(c) raise exceptions.LaunchError("Elasticsearch REST API layer is not available. Forcefully terminated cluster.") return c
def start(self, node_configurations=None): hosts = self.cfg.opts("client", "hosts").default client_options = self.cfg.opts("client", "options").default es = self.client_factory(hosts, client_options).create() # cannot enable custom telemetry devices here t = telemetry.Telemetry(devices=[ # This is needed to actually populate the nodes telemetry.ClusterMetaDataInfo(es), # will gather node specific meta-data for all nodes telemetry.ExternalEnvironmentInfo(es, self.metrics_store), ]) # We create a pseudo-cluster here to get information about all nodes. # cluster nodes will be populated by the external environment info telemetry device. We cannot know this upfront. c = cluster.Cluster(hosts, [], t) user_defined_version = self.cfg.opts("mechanic", "distribution.version", mandatory=False) distribution_version = es.info()["version"]["number"] if not user_defined_version or user_defined_version.strip() == "": self.logger.info("Distribution version was not specified by user. Rally-determined version is [%s]", distribution_version) self.cfg.add(config.Scope.benchmark, "mechanic", "distribution.version", distribution_version) elif user_defined_version != distribution_version: console.warn( "Specified distribution version '%s' on the command line differs from version '%s' reported by the cluster." % (user_defined_version, distribution_version), logger=self.logger) t.attach_to_cluster(c) return c.nodes
def test_stores_cluster_level_metrics_on_attach(self, metrics_store_add_meta_info): nodes_info = {"nodes": collections.OrderedDict()} nodes_info["nodes"]["FCFjozkeTiOpN-SI88YEcg"] = { "name": "rally0", "host": "127.0.0.1", "attributes": { "group": "cold_nodes" }, "os": { "name": "Mac OS X", "version": "10.11.4", "available_processors": 8 }, "jvm": { "version": "1.8.0_74", "vm_vendor": "Oracle Corporation" } } nodes_info["nodes"]["EEEjozkeTiOpN-SI88YEcg"] = { "name": "rally1", "host": "127.0.0.1", "attributes": { "group": "hot_nodes" }, "os": { "name": "Mac OS X", "version": "10.11.5", "available_processors": 8 }, "jvm": { "version": "1.8.0_102", "vm_vendor": "Oracle Corporation" } } cluster_info = { "version": { "build_hash": "abc123", "number": "6.0.0-alpha1" } } client = Client(nodes=SubClient(info=nodes_info), info=cluster_info) metrics_store = metrics.EsMetricsStore(self.cfg) env_device = telemetry.EnvironmentInfo(client, metrics_store) t = telemetry.Telemetry(self.cfg, devices=[env_device]) t.attach_to_cluster(cluster.Cluster([], [], t)) calls = [ mock.call(metrics.MetaInfoScope.cluster, None, "source_revision", "abc123"), mock.call(metrics.MetaInfoScope.cluster, None, "distribution_version", "6.0.0-alpha1"), mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_vendor", "Oracle Corporation"), mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_version", "1.8.0_74"), mock.call(metrics.MetaInfoScope.node, "rally1", "jvm_vendor", "Oracle Corporation"), mock.call(metrics.MetaInfoScope.node, "rally1", "jvm_version", "1.8.0_102"), mock.call(metrics.MetaInfoScope.node, "rally0", "attribute_group", "cold_nodes"), mock.call(metrics.MetaInfoScope.node, "rally1", "attribute_group", "hot_nodes") ] metrics_store_add_meta_info.assert_has_calls(calls)
def test_stores_cluster_level_metrics_on_attach(self, metrics_store_add_meta_info): nodes_stats = { "nodes": { "FCFjozkeTiOpN-SI88YEcg": { "name": "rally0", "host": "127.0.0.1" } } } nodes_info = { "nodes": { "FCFjozkeTiOpN-SI88YEcg": { "name": "rally0", "host": "127.0.0.1", "attributes": { "az": "us_east1" }, "os": { "name": "Mac OS X", "version": "10.11.4", "available_processors": 8 }, "jvm": { "version": "1.8.0_74", "vm_vendor": "Oracle Corporation" } } } } cluster_info = { "version": { "build_hash": "253032b", "number": "5.0.0" } } client = Client(nodes=SubClient(stats=nodes_stats, info=nodes_info), info=cluster_info) metrics_store = metrics.EsMetricsStore(self.cfg) env_device = telemetry.ExternalEnvironmentInfo(client, metrics_store) t = telemetry.Telemetry(devices=[env_device]) t.attach_to_cluster(cluster.Cluster([], [], t)) calls = [ mock.call(metrics.MetaInfoScope.cluster, None, "source_revision", "253032b"), mock.call(metrics.MetaInfoScope.cluster, None, "distribution_version", "5.0.0"), mock.call(metrics.MetaInfoScope.node, "rally0", "node_name", "rally0"), mock.call(metrics.MetaInfoScope.node, "rally0", "host_name", "127.0.0.1"), mock.call(metrics.MetaInfoScope.node, "rally0", "os_name", "Mac OS X"), mock.call(metrics.MetaInfoScope.node, "rally0", "os_version", "10.11.4"), mock.call(metrics.MetaInfoScope.node, "rally0", "cpu_logical_cores", 8), mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_vendor", "Oracle Corporation"), mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_version", "1.8.0_74"), mock.call(metrics.MetaInfoScope.node, "rally0", "attribute_az", "us_east1"), mock.call(metrics.MetaInfoScope.cluster, None, "attribute_az", "us_east1") ] metrics_store_add_meta_info.assert_has_calls(calls)
def start(self): """ Performs final startup tasks. Precondition: All cluster nodes have been started. Postcondition: The cluster is ready to receive HTTP requests or a ``LaunchError`` is raised. :return: A representation of the launched cluster. """ enabled_devices = self.cfg.opts("mechanic", "telemetry.devices") telemetry_params = self.cfg.opts("mechanic", "telemetry.params") all_hosts = self.cfg.opts("client", "hosts").all_hosts default_hosts = self.cfg.opts("client", "hosts").default preserve = self.cfg.opts("mechanic", "preserve.install") skip_rest_api_check = self.cfg.opts("mechanic", "skip.rest.api.check") es = {} for cluster_name, cluster_hosts in all_hosts.items(): all_client_options = self.cfg.opts("client", "options").all_client_options cluster_client_options = dict(all_client_options[cluster_name]) # Use retries to avoid aborts on long living connections for telemetry devices cluster_client_options["retry-on-timeout"] = True es[cluster_name] = self.client_factory(cluster_hosts, cluster_client_options).create() es_default = es["default"] t = telemetry.Telemetry(enabled_devices, devices=[ telemetry.NodeStats(telemetry_params, es, self.metrics_store), telemetry.ClusterMetaDataInfo(es_default), telemetry.ClusterEnvironmentInfo(es_default, self.metrics_store), telemetry.JvmStatsSummary(es_default, self.metrics_store), telemetry.IndexStats(es_default, self.metrics_store), telemetry.MlBucketProcessingTime(es_default, self.metrics_store), telemetry.CcrStats(telemetry_params, es, self.metrics_store), telemetry.RecoveryStats(telemetry_params, es, self.metrics_store) ]) # The list of nodes will be populated by ClusterMetaDataInfo, so no need to do it here c = cluster.Cluster(default_hosts, [], t, preserve) if skip_rest_api_check: self.logger.info("Skipping REST API check and attaching telemetry devices to cluster.") t.attach_to_cluster(c) self.logger.info("Telemetry devices are now attached to the cluster.") else: self.logger.info("All cluster nodes have successfully started. Checking if REST API is available.") if wait_for_rest_layer(es_default, max_attempts=40): self.logger.info("REST API is available. Attaching telemetry devices to cluster.") t.attach_to_cluster(c) self.logger.info("Telemetry devices are now attached to the cluster.") else: # Just stop the cluster here and raise. The caller is responsible for terminating individual nodes. self.logger.error("REST API layer is not yet available. Forcefully terminating cluster.") self.stop(c) raise exceptions.LaunchError( "Elasticsearch REST API layer is not available. Forcefully terminated cluster.") return c
def test_fallback_when_host_not_available(self, metrics_store_add_meta_info): nodes_stats = { "nodes": { "FCFjozkeTiOpN-SI88YEcg": { "name": "rally0", } } } nodes_info = { "nodes": { "FCFjozkeTiOpN-SI88YEcg": { "name": "rally0", "os": { "name": "Mac OS X", "version": "10.11.4", "available_processors": 8 }, "jvm": { "version": "1.8.0_74", "vm_vendor": "Oracle Corporation" } } } } cluster_info = {"version": {"build_hash": "abc123"}} client = Client(cluster=SubClient(nodes_stats), nodes=SubClient(nodes_info), info=cluster_info) metrics_store = metrics.EsMetricsStore(self.cfg) env_device = telemetry.ExternalEnvironmentInfo(self.cfg, client, metrics_store) t = telemetry.Telemetry(self.cfg, devices=[env_device]) t.attach_to_cluster(cluster.Cluster([], t)) calls = [ mock.call(metrics.MetaInfoScope.cluster, None, "source_revision", "abc123"), mock.call(metrics.MetaInfoScope.node, "rally0", "node_name", "rally0"), mock.call(metrics.MetaInfoScope.node, "rally0", "host_name", "unknown"), mock.call(metrics.MetaInfoScope.node, "rally0", "os_name", "Mac OS X"), mock.call(metrics.MetaInfoScope.node, "rally0", "os_version", "10.11.4"), mock.call(metrics.MetaInfoScope.node, "rally0", "cpu_logical_cores", 8), mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_vendor", "Oracle Corporation"), mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_version", "1.8.0_74") ] metrics_store_add_meta_info.assert_has_calls(calls)
def start(self, car=None): console.println(ExternalLauncher.BOGUS_RESULTS_WARNING) hosts = self.cfg.opts("launcher", "external.target.hosts") client_options = self.cfg.opts("launcher", "client.options") # unified client config self.cfg.add(config.Scope.benchmark, "client", "hosts", hosts) self.cfg.add(config.Scope.benchmark, "client", "options", client_options) es = self.client_factory(hosts, client_options).create() t = telemetry.Telemetry( self.cfg, devices=[ telemetry.ExternalEnvironmentInfo(self.cfg, es, self.metrics_store), telemetry.NodeStats(self.cfg, es, self.metrics_store), telemetry.IndexStats(self.cfg, es, self.metrics_store) ]) c = cluster.Cluster([], t) user_defined_version = self.cfg.opts("source", "distribution.version", mandatory=False) distribution_version = es.info()["version"]["number"] if not user_defined_version or user_defined_version.strip() == "": logger.info( "Distribution version was not specified by user. Rally-determined version is [%s]" % distribution_version) self.cfg.add(config.Scope.benchmark, "source", "distribution.version", distribution_version) elif user_defined_version != distribution_version: console.println( "Warning: Specified distribution version '%s' on the command line differs from version '%s' reported by the cluster." % (user_defined_version, distribution_version), logger=logger.warn) t.attach_to_cluster(c) return c
def test_enriches_cluster_nodes_for_elasticsearch_1_x(self): nodes_stats = { "nodes": { "FCFjozkeTiOpN-SI88YEcg": { "name": "rally0", "host": "127.0.0.1", "fs": { "data": [{ "mount": "/usr/local/var/elasticsearch/data1", "type": "hfs" }, { "mount": "/usr/local/var/elasticsearch/data2", "type": "ntfs" }] } } } } nodes_info = { "nodes": { "FCFjozkeTiOpN-SI88YEcg": { "name": "rally0", "host": "127.0.0.1", "ip": "127.0.0.1", "os": { "name": "Mac OS X", "version": "10.11.4", "available_processors": 8, "mem": { "total_in_bytes": 17179869184 } }, "jvm": { "version": "1.8.0_74", "vm_vendor": "Oracle Corporation" } } } } cluster_info = { "version": { "build_hash": "c730b59357f8ebc555286794dcd90b3411f517c9", "number": "1.7.5" } } client = Client(nodes=SubClient(stats=nodes_stats, info=nodes_info), info=cluster_info) t = telemetry.Telemetry( devices=[telemetry.ClusterMetaDataInfo(client)]) c = cluster.Cluster(hosts=[{ "host": "localhost", "port": 39200 }], nodes=[ cluster.Node(process=None, host_name="local", node_name="rally0", telemetry=None) ], telemetry=t) t.attach_to_cluster(c) self.assertEqual("1.7.5", c.distribution_version) self.assertEqual("c730b59357f8ebc555286794dcd90b3411f517c9", c.source_revision) self.assertEqual(1, len(c.nodes)) n = c.nodes[0] self.assertEqual("127.0.0.1", n.ip) self.assertEqual("Mac OS X", n.os["name"]) self.assertEqual("10.11.4", n.os["version"]) self.assertEqual("Oracle Corporation", n.jvm["vendor"]) self.assertEqual("1.8.0_74", n.jvm["version"]) self.assertEqual(8, n.cpu["available_processors"]) self.assertIsNone(n.cpu["allocated_processors"]) self.assertEqual(17179869184, n.memory["total_bytes"]) self.assertEqual(2, len(n.fs)) self.assertEqual("/usr/local/var/elasticsearch/data1", n.fs[0]["mount"]) self.assertEqual("hfs", n.fs[0]["type"]) self.assertEqual("unknown", n.fs[0]["spins"]) self.assertEqual("/usr/local/var/elasticsearch/data2", n.fs[1]["mount"]) self.assertEqual("ntfs", n.fs[1]["type"]) self.assertEqual("unknown", n.fs[1]["spins"])
def test_enriches_cluster_nodes_for_elasticsearch_after_1_x(self): nodes_stats = { "nodes": { "FCFjozkeTiOpN-SI88YEcg": { "name": "rally0", "host": "127.0.0.1", "os": { "mem": { "total_in_bytes": 17179869184 } }, "fs": { "data": [{ "mount": "/usr/local/var/elasticsearch/data1", "type": "hfs" }, { "mount": "/usr/local/var/elasticsearch/data2", "type": "ntfs" }] } } } } nodes_info = { "nodes": { "FCFjozkeTiOpN-SI88YEcg": { "name": "rally0", "host": "127.0.0.1", "ip": "127.0.0.1", "os": { "name": "Mac OS X", "version": "10.11.4", "available_processors": 8, "allocated_processors": 4 }, "jvm": { "version": "1.8.0_74", "vm_vendor": "Oracle Corporation" }, "plugins": [{ "name": "analysis-icu", "version": "5.0.0", "description": "The ICU Analysis plugin integrates Lucene ICU module ...", "classname": "org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin", "has_native_controller": False }, { "name": "ingest-geoip", "version": "5.0.0", "description": "Ingest processor that uses looksup geo data ...", "classname": "org.elasticsearch.ingest.geoip.IngestGeoIpPlugin", "has_native_controller": False }, { "name": "ingest-user-agent", "version": "5.0.0", "description": "Ingest processor that extracts information from a user agent", "classname": "org.elasticsearch.ingest.useragent.IngestUserAgentPlugin", "has_native_controller": False }] } } } cluster_info = { "version": { "build_hash": "253032b", "number": "5.0.0" } } client = Client(nodes=SubClient(stats=nodes_stats, info=nodes_info), info=cluster_info) t = telemetry.Telemetry( devices=[telemetry.ClusterMetaDataInfo(client)]) c = cluster.Cluster(hosts=[{ "host": "localhost", "port": 39200 }], nodes=[ cluster.Node(process=None, host_name="local", node_name="rally0", telemetry=None) ], telemetry=t) t.attach_to_cluster(c) self.assertEqual("5.0.0", c.distribution_version) self.assertEqual("253032b", c.source_revision) self.assertEqual(1, len(c.nodes)) n = c.nodes[0] self.assertEqual("127.0.0.1", n.ip) self.assertEqual("Mac OS X", n.os["name"]) self.assertEqual("10.11.4", n.os["version"]) self.assertEqual("Oracle Corporation", n.jvm["vendor"]) self.assertEqual("1.8.0_74", n.jvm["version"]) self.assertEqual(8, n.cpu["available_processors"]) self.assertEqual(4, n.cpu["allocated_processors"]) self.assertEqual(17179869184, n.memory["total_bytes"]) self.assertEqual(2, len(n.fs)) self.assertEqual("/usr/local/var/elasticsearch/data1", n.fs[0]["mount"]) self.assertEqual("hfs", n.fs[0]["type"]) self.assertEqual("unknown", n.fs[0]["spins"]) self.assertEqual("/usr/local/var/elasticsearch/data2", n.fs[1]["mount"]) self.assertEqual("ntfs", n.fs[1]["type"]) self.assertEqual("unknown", n.fs[1]["spins"]) self.assertEqual(["analysis-icu", "ingest-geoip", "ingest-user-agent"], n.plugins)
def test_stores_all_node_metrics_on_attach(self, metrics_store_add_meta_info): nodes_stats = { "nodes": { "FCFjozkeTiOpN-SI88YEcg": { "name": "rally0", "host": "127.0.0.1" } } } nodes_info = { "nodes": { "FCFjozkeTiOpN-SI88YEcg": { "name": "rally0", "host": "127.0.0.1", "attributes": { "az": "us_east1" }, "os": { "name": "Mac OS X", "version": "10.11.4", "available_processors": 8 }, "jvm": { "version": "1.8.0_74", "vm_vendor": "Oracle Corporation" }, "plugins": [{ "name": "ingest-geoip", "version": "5.0.0", "description": "Ingest processor that uses looksup geo data ...", "classname": "org.elasticsearch.ingest.geoip.IngestGeoIpPlugin", "has_native_controller": False }] } } } cluster_info = { "version": { "build_hash": "253032b", "number": "5.0.0" } } client = Client(nodes=SubClient(stats=nodes_stats, info=nodes_info), info=cluster_info) metrics_store = metrics.EsMetricsStore(self.cfg) env_device = telemetry.ExternalEnvironmentInfo(client, metrics_store) t = telemetry.Telemetry(devices=[env_device]) t.attach_to_cluster(cluster.Cluster([], [], t)) calls = [ mock.call(metrics.MetaInfoScope.node, "rally0", "node_name", "rally0"), mock.call(metrics.MetaInfoScope.node, "rally0", "host_name", "127.0.0.1"), mock.call(metrics.MetaInfoScope.node, "rally0", "os_name", "Mac OS X"), mock.call(metrics.MetaInfoScope.node, "rally0", "os_version", "10.11.4"), mock.call(metrics.MetaInfoScope.node, "rally0", "cpu_logical_cores", 8), mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_vendor", "Oracle Corporation"), mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_version", "1.8.0_74"), mock.call(metrics.MetaInfoScope.node, "rally0", "plugins", ["ingest-geoip"]), # these are automatically pushed up to cluster level (additionally) if all nodes match mock.call(metrics.MetaInfoScope.cluster, None, "plugins", ["ingest-geoip"]), mock.call(metrics.MetaInfoScope.node, "rally0", "attribute_az", "us_east1"), mock.call(metrics.MetaInfoScope.cluster, None, "attribute_az", "us_east1"), ] metrics_store_add_meta_info.assert_has_calls(calls)
def test_stores_cluster_level_metrics_on_attach( self, metrics_store_add_meta_info): nodes_info = {"nodes": collections.OrderedDict()} nodes_info["nodes"]["FCFjozkeTiOpN-SI88YEcg"] = { "name": "rally0", "host": "127.0.0.1", "attributes": { "group": "cold_nodes" }, "os": { "name": "Mac OS X", "version": "10.11.4", "available_processors": 8 }, "jvm": { "version": "1.8.0_74", "vm_vendor": "Oracle Corporation" }, "plugins": [{ "name": "ingest-geoip", "version": "5.0.0", "description": "Ingest processor that uses looksup geo data ...", "classname": "org.elasticsearch.ingest.geoip.IngestGeoIpPlugin", "has_native_controller": False }] } nodes_info["nodes"]["EEEjozkeTiOpN-SI88YEcg"] = { "name": "rally1", "host": "127.0.0.1", "attributes": { "group": "hot_nodes" }, "os": { "name": "Mac OS X", "version": "10.11.5", "available_processors": 8 }, "jvm": { "version": "1.8.0_102", "vm_vendor": "Oracle Corporation" }, "plugins": [{ "name": "ingest-geoip", "version": "5.0.0", "description": "Ingest processor that uses looksup geo data ...", "classname": "org.elasticsearch.ingest.geoip.IngestGeoIpPlugin", "has_native_controller": False }] } cluster_info = { "version": { "build_hash": "abc123", "number": "6.0.0-alpha1" } } cfg = create_config() client = Client(nodes=SubClient(info=nodes_info), info=cluster_info) metrics_store = metrics.EsMetricsStore(cfg) env_device = telemetry.ClusterEnvironmentInfo(client, metrics_store) t = telemetry.Telemetry(cfg, devices=[env_device]) t.attach_to_cluster(cluster.Cluster([], [], t)) calls = [ mock.call(metrics.MetaInfoScope.cluster, None, "source_revision", "abc123"), mock.call(metrics.MetaInfoScope.cluster, None, "distribution_version", "6.0.0-alpha1"), mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_vendor", "Oracle Corporation"), mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_version", "1.8.0_74"), mock.call(metrics.MetaInfoScope.node, "rally1", "jvm_vendor", "Oracle Corporation"), mock.call(metrics.MetaInfoScope.node, "rally1", "jvm_version", "1.8.0_102"), mock.call(metrics.MetaInfoScope.node, "rally0", "plugins", ["ingest-geoip"]), mock.call(metrics.MetaInfoScope.node, "rally1", "plugins", ["ingest-geoip"]), # can push up to cluster level as all nodes have the same plugins installed mock.call(metrics.MetaInfoScope.cluster, None, "plugins", ["ingest-geoip"]), mock.call(metrics.MetaInfoScope.node, "rally0", "attribute_group", "cold_nodes"), mock.call(metrics.MetaInfoScope.node, "rally1", "attribute_group", "hot_nodes"), ] metrics_store_add_meta_info.assert_has_calls(calls)
def start(self, car): # hardcoded for the moment, should actually be identical to internal launcher # Only needed on Mac: # hosts = [{"host": process.run_subprocess_with_output("docker-machine ip default")[0].strip(), "port": 9200}] hosts = [{"host": "localhost", "port": 9200}] client_options = self.cfg.opts("launcher", "client.options") # unified client config self.cfg.add(config.Scope.benchmark, "client", "hosts", hosts) self.cfg.add(config.Scope.benchmark, "client", "options", client_options) es = self.client_factory(hosts, client_options).create() t = telemetry.Telemetry( self.cfg, devices=[ # Be aware that some the meta-data are taken from the host system, not the container (e.g. number of CPU cores) so if the # Docker container constrains these, the metrics are actually wrong. telemetry.EnvironmentInfo(self.cfg, es, self.metrics_store), telemetry.NodeStats(self.cfg, es, self.metrics_store), telemetry.IndexStats(self.cfg, es, self.metrics_store), telemetry.DiskIo(self.cfg, self.metrics_store), telemetry.CpuUsage(self.cfg, self.metrics_store) ]) distribution_version = self.cfg.opts("source", "distribution.version", mandatory=False) install_dir = self._install_dir() io.ensure_dir(install_dir) java_opts = "" if car.heap: java_opts += "-Xms%s -Xmx%s " % (car.heap, car.heap) if car.java_opts: java_opts += car.java_opts vars = { "es_java_opts": java_opts, "container_memory_gb": "%dg" % (convert.bytes_to_gb(psutil.virtual_memory().total) // 2), "es_data_dir": "%s/data" % install_dir, "es_version": distribution_version } docker_cfg = self._render_template_from_file(vars) logger.info("Starting Docker container with configuration:\n%s" % docker_cfg) docker_cfg_path = self._docker_cfg_path() with open(docker_cfg_path, "wt") as f: f.write(docker_cfg) c = cluster.Cluster([], t) self._start_process(cmd="docker-compose -f %s up" % docker_cfg_path, node_name="rally0") # Wait for a little while: Plugins may still be initializing although the node has already started. time.sleep(10) t.attach_to_cluster(c) logger.info("Successfully started Docker container") return c
def test_store_results(self): # here we need the real thing from esrally import reporter from esrally.mechanic import cluster schedule = [ track.Task(track.Operation("index", track.OperationType.Index)) ] t = track.Track(name="unittest-track", short_description="unittest track", source_root_url="http://example.org", indices=[ track.Index(name="tests", auto_managed=True, types=[ track.Type(name="test-type", mapping_file=None) ]) ], challenges=[ track.Challenge(name="index", description="Index", default=True, index_settings=None, schedule=schedule) ]) c = cluster.Cluster([], [], None) c.distribution_version = "5.0.0" node = c.add_node("localhost", "rally-node-0") node.plugins.append("x-pack") race = metrics.Race( rally_version="0.4.4", environment_name="unittest", trial_timestamp=EsResultsStoreTests.TRIAL_TIMESTAMP, pipeline="from-sources", user_tag="let-me-test", track=t, challenge=t.default_challenge, car="4gheap", total_laps=12, cluster=c, lap_results=[], results=reporter.Stats({ "young_gc_time": 100, "old_gc_time": 5, "op_metrics": [{ "operation": "index", "throughput": { "min": 1000, "median": 1250, "max": 1500, "unit": "docs/s" } }] })) self.race_store.store_results(race) expected_docs = [{ "environment": "unittest", "trial-timestamp": "20160131T000000Z", "distribution-version": "5.0.0", "distribution-major-version": 5, "user-tag": "let-me-test", "track": "unittest-track", "challenge": "index", "car": "4gheap", "node-count": 1, "plugins": ["x-pack"], "active": True, "name": "old_gc_time", "value": { "single": 5 } }, { "environment": "unittest", "trial-timestamp": "20160131T000000Z", "distribution-version": "5.0.0", "distribution-major-version": 5, "user-tag": "let-me-test", "track": "unittest-track", "challenge": "index", "car": "4gheap", "node-count": 1, "plugins": ["x-pack"], "active": True, "name": "throughput", "operation": "index", "value": { "min": 1000, "median": 1250, "max": 1500, "unit": "docs/s" } }, { "environment": "unittest", "trial-timestamp": "20160131T000000Z", "distribution-version": "5.0.0", "distribution-major-version": 5, "user-tag": "let-me-test", "track": "unittest-track", "challenge": "index", "car": "4gheap", "node-count": 1, "plugins": ["x-pack"], "active": True, "name": "young_gc_time", "value": { "single": 100 } }] self.es_mock.bulk_index.assert_called_with( index="rally-results-2016-01", doc_type="results", items=expected_docs)
def test_store_results(self): # here we need the real thing from esrally import reporter from esrally.mechanic import cluster schedule = [ track.Task("index #1", track.Operation("index", track.OperationType.Bulk)) ] t = track.Track(name="unittest-track", indices=[track.Index(name="tests", types=["test-type"])], challenges=[track.Challenge(name="index", default=True, schedule=schedule)]) c = cluster.Cluster([], [], None) c.distribution_version = "5.0.0" node = c.add_node("localhost", "rally-node-0") node.plugins.append("x-pack") race = metrics.Race(rally_version="0.4.4", environment_name="unittest", trial_id=EsResultsStoreTests.TRIAL_ID, trial_timestamp=EsResultsStoreTests.TRIAL_TIMESTAMP, pipeline="from-sources", user_tags={"os": "Linux"}, track=t, track_params=None, challenge=t.default_challenge, car="4gheap", car_params=None, plugin_params={"some-param": True}, total_laps=12, cluster=c, lap_results=[], results=reporter.Stats( { "young_gc_time": 100, "old_gc_time": 5, "op_metrics": [ { "task": "index #1", "operation": "index", "throughput": { "min": 1000, "median": 1250, "max": 1500, "unit": "docs/s" } } ], "node_metrics": [ { "node": "rally-node-0", "startup_time": 3.4 } ] }) ) self.race_store.store_results(race) expected_docs = [ { "rally-version": "0.4.4", "environment": "unittest", "trial-id": EsResultsStoreTests.TRIAL_ID, "trial-timestamp": "20160131T000000Z", "distribution-version": "5.0.0", "distribution-major-version": 5, "user-tags": { "os": "Linux" }, "track": "unittest-track", "challenge": "index", "car": "4gheap", "plugin-params": { "some-param": True }, "node-count": 1, "plugins": ["x-pack"], "active": True, "name": "old_gc_time", "value": { "single": 5 } }, { "rally-version": "0.4.4", "environment": "unittest", "trial-id": EsResultsStoreTests.TRIAL_ID, "trial-timestamp": "20160131T000000Z", "distribution-version": "5.0.0", "distribution-major-version": 5, "user-tags": { "os": "Linux" }, "track": "unittest-track", "challenge": "index", "car": "4gheap", "plugin-params": { "some-param": True }, "node-count": 1, "plugins": ["x-pack"], "active": True, "node": "rally-node-0", "name": "startup_time", "value": { "single": 3.4 }, }, { "rally-version": "0.4.4", "environment": "unittest", "trial-id": EsResultsStoreTests.TRIAL_ID, "trial-timestamp": "20160131T000000Z", "distribution-version": "5.0.0", "distribution-major-version": 5, "user-tags": { "os": "Linux" }, "track": "unittest-track", "challenge": "index", "car": "4gheap", "plugin-params": { "some-param": True }, "node-count": 1, "plugins": ["x-pack"], "active": True, "name": "throughput", "task": "index #1", "operation": "index", "value": { "min": 1000, "median": 1250, "max": 1500, "unit": "docs/s" } }, { "rally-version": "0.4.4", "environment": "unittest", "trial-id": EsResultsStoreTests.TRIAL_ID, "trial-timestamp": "20160131T000000Z", "distribution-version": "5.0.0", "distribution-major-version": 5, "user-tags": { "os": "Linux" }, "track": "unittest-track", "challenge": "index", "car": "4gheap", "plugin-params": { "some-param": True }, "node-count": 1, "plugins": ["x-pack"], "active": True, "name": "young_gc_time", "value": { "single": 100 } } ] self.es_mock.bulk_index.assert_called_with(index="rally-results-2016-01", doc_type="results", items=expected_docs)