Пример #1
0
    def start(self, car=None, binary=None, data_paths=None):
        console.println(ExternalLauncher.BOGUS_RESULTS_WARNING)
        hosts = self.cfg.opts("client", "hosts")
        client_options = self.cfg.opts("client", "options")
        es = self.client_factory(hosts, client_options).create()

        # cannot enable custom telemetry devices here
        t = telemetry.Telemetry(devices=[
            telemetry.ClusterMetaDataInfo(es),
            telemetry.ExternalEnvironmentInfo(es, self.metrics_store),
            telemetry.NodeStats(es, self.metrics_store),
            telemetry.IndexStats(es, self.metrics_store)
        ])
        # cluster nodes will be populated by the external environment info telemetry device. We cannot know this upfront.
        c = cluster.Cluster(hosts, [], t)
        user_defined_version = self.cfg.opts("mechanic", "distribution.version", mandatory=False)
        distribution_version = es.info()["version"]["number"]
        if not user_defined_version or user_defined_version.strip() == "":
            logger.info("Distribution version was not specified by user. Rally-determined version is [%s]" % distribution_version)
            self.cfg.add(config.Scope.benchmark, "mechanic", "distribution.version", distribution_version)
        elif user_defined_version != distribution_version:
            console.warn(
                "Specified distribution version '%s' on the command line differs from version '%s' reported by the cluster." %
                (user_defined_version, distribution_version), logger=logger)
        t.attach_to_cluster(c)
        return c
Пример #2
0
    def _start_node(self, node_configuration, node_count_on_host):
        host_name = node_configuration.ip
        node_name = node_configuration.node_name
        car = node_configuration.car
        binary_path = node_configuration.binary_path
        data_paths = node_configuration.data_paths
        node_telemetry_dir = os.path.join(node_configuration.node_root_path, "telemetry")

        java_major_version, java_home = java_resolver.java_home(car, self.cfg)

        self.logger.info("Starting node [%s] based on car [%s].", node_name, car)

        enabled_devices = self.cfg.opts("mechanic", "telemetry.devices")
        telemetry_params = self.cfg.opts("mechanic", "telemetry.params")
        node_telemetry = [
            telemetry.FlightRecorder(telemetry_params, node_telemetry_dir, java_major_version),
            telemetry.JitCompiler(node_telemetry_dir),
            telemetry.Gc(node_telemetry_dir, java_major_version),
            telemetry.DiskIo(self.metrics_store, node_count_on_host, node_telemetry_dir, node_name),
            telemetry.NodeEnvironmentInfo(self.metrics_store),
            telemetry.IndexSize(data_paths, self.metrics_store),
            telemetry.StartupTime(self.metrics_store),
        ]

        t = telemetry.Telemetry(enabled_devices, devices=node_telemetry)
        env = self._prepare_env(car, node_name, java_home, t)
        t.on_pre_node_start(node_name)
        node_pid = self._start_process(binary_path, env)
        node = cluster.Node(node_pid, host_name, node_name, t)
        self.logger.info("Attaching telemetry devices to node [%s].", node_name)
        t.attach_to_node(node)

        return node
Пример #3
0
    def test_merges_options_set_by_different_devices(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "mechanic", "telemetry.devices",
                "jfr")
        cfg.add(config.Scope.application, "system", "challenge.root.dir",
                "challenge-root")
        cfg.add(config.Scope.application, "benchmarks", "metrics.log.dir",
                "telemetry")

        devices = [
            MockTelemetryDevice({"ES_JAVA_OPTS": "-Xms256M"}),
            MockTelemetryDevice({"ES_JAVA_OPTS": "-Xmx512M"}),
            MockTelemetryDevice({"ES_NET_HOST": "127.0.0.1"})
        ]

        t = telemetry.Telemetry(enabled_devices=None, devices=devices)

        default_car = team.Car(name="default-car",
                               config_paths=["/tmp/rally-config"])
        opts = t.instrument_candidate_env(default_car, "default-node")

        self.assertTrue(opts)
        self.assertEqual(len(opts), 2)
        self.assertEqual("-Xms256M -Xmx512M", opts["ES_JAVA_OPTS"])
        self.assertEqual("127.0.0.1", opts["ES_NET_HOST"])
Пример #4
0
    def start(self):
        hosts = self.cfg.opts("client", "hosts")
        client_options = self.cfg.opts("client", "options")
        es = self.client_factory(hosts, client_options).create()

        t = telemetry.Telemetry(devices=[
            telemetry.ClusterMetaDataInfo(es),
            telemetry.ClusterEnvironmentInfo(es, self.metrics_store),
            telemetry.NodeStats(es, self.metrics_store),
            telemetry.IndexStats(es, self.metrics_store)
        ])

        # The list of nodes will be populated by ClusterMetaDataInfo, so no need to do it here
        c = cluster.Cluster(hosts, [], t)
        logger.info(
            "All cluster nodes have successfully started. Checking if REST API is available."
        )
        if wait_for_rest_layer(es, max_attempts=20):
            logger.info(
                "REST API is available. Attaching telemetry devices to cluster."
            )
            t.attach_to_cluster(c)
            logger.info("Telemetry devices are now attached to the cluster.")
        else:
            # Just stop the cluster here and raise. The caller is responsible for terminating individual nodes.
            logger.error(
                "REST API layer is not yet available. Forcefully terminating cluster."
            )
            self.stop(c)
            raise exceptions.LaunchError(
                "Elasticsearch REST API layer is not available. Forcefully terminated cluster."
            )

        return c
Пример #5
0
    def start(self, node_configurations=None):
        hosts = self.cfg.opts("client", "hosts")
        client_options = self.cfg.opts("client", "options")
        es = self.client_factory(hosts, client_options).create()

        # cannot enable custom telemetry devices here
        t = telemetry.Telemetry(devices=[
            # This is needed to actually populate the nodes
            telemetry.ClusterMetaDataInfo(es),
            # will gather node specific meta-data for all nodes
            telemetry.ExternalEnvironmentInfo(es, self.metrics_store),
        ])
        # We create a pseudo-cluster here to get information about all nodes.
        # cluster nodes will be populated by the external environment info telemetry device. We cannot know this upfront.
        c = cluster.Cluster(hosts, [], t)
        user_defined_version = self.cfg.opts("mechanic",
                                             "distribution.version",
                                             mandatory=False)
        distribution_version = es.info()["version"]["number"]
        if not user_defined_version or user_defined_version.strip() == "":
            logger.info(
                "Distribution version was not specified by user. Rally-determined version is [%s]"
                % distribution_version)
            self.cfg.add(config.Scope.benchmark, "mechanic",
                         "distribution.version", distribution_version)
        elif user_defined_version != distribution_version:
            console.warn(
                "Specified distribution version '%s' on the command line differs from version '%s' reported by the cluster."
                % (user_defined_version, distribution_version),
                logger=logger)
        t.attach_to_cluster(c)
        return c.nodes
Пример #6
0
    def test_merges_options_set_by_different_devices(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "telemetry", "devices", "jfr")
        cfg.add(config.Scope.application, "system", "challenge.root.dir",
                "challenge-root")
        cfg.add(config.Scope.application, "benchmarks", "metrics.log.dir",
                "telemetry")

        # we don't need one for this test
        metrics_store = None

        devices = [
            MockTelemetryDevice(cfg, metrics_store,
                                {"ES_JAVA_OPTS": "-Xms256M"}),
            MockTelemetryDevice(cfg, metrics_store,
                                {"ES_JAVA_OPTS": "-Xmx512M"}),
            MockTelemetryDevice(cfg, metrics_store,
                                {"ES_NET_HOST": "127.0.0.1"})
        ]

        t = telemetry.Telemetry(cfg=cfg, devices=devices)

        default_car = car.Car(name="default-car")
        opts = t.instrument_candidate_env(default_car, "default-node")

        self.assertTrue(opts)
        self.assertEqual(len(opts), 2)
        self.assertEqual("-Xms256M -Xmx512M", opts["ES_JAVA_OPTS"])
        self.assertEqual("127.0.0.1", opts["ES_NET_HOST"])
Пример #7
0
    def start(self, car, binary, data_paths):
        self.binary_path = binary

        hosts = self.cfg.opts("client", "hosts")
        client_options = self.cfg.opts("client", "options")
        es = self.client_factory(hosts, client_options).create()

        # Cannot enable custom telemetry devices here
        t = telemetry.Telemetry(devices=[
            # Be aware that some the meta-data are taken from the host system, not the container (e.g. number of CPU cores) so if the
            # Docker container constrains these, the metrics are actually wrong.
            telemetry.EnvironmentInfo(es, self.metrics_store),
            telemetry.NodeStats(es, self.metrics_store),
            telemetry.IndexStats(es, self.metrics_store),
            telemetry.DiskIo(self.metrics_store),
            telemetry.CpuUsage(self.metrics_store)
        ])

        c = cluster.Cluster(hosts, [], t)
        self._start_process(cmd="docker-compose -f %s up" % self.binary_path, node_name="rally0")
        logger.info("Docker container has successfully started. Checking if REST API is available.")
        if wait_for_rest_layer(es):
            logger.info("REST API is available. Attaching telemetry devices to cluster.")
            t.attach_to_cluster(c)
            logger.info("Telemetry devices are now attached to the cluster.")
        else:
            logger.error("REST API layer is not yet available. Forcefully terminating cluster.")
            self.stop(c)
            raise exceptions.LaunchError("Elasticsearch REST API layer is not available. Forcefully terminated cluster.")
        return c
Пример #8
0
    def start(self, node_configurations=None):
        hosts = self.cfg.opts("client", "hosts").default
        client_options = self.cfg.opts("client", "options").default
        es = self.client_factory(hosts, client_options).create()

        # cannot enable custom telemetry devices here
        t = telemetry.Telemetry(devices=[
            # This is needed to actually populate the nodes
            telemetry.ClusterMetaDataInfo(es),
            # will gather node specific meta-data for all nodes
            telemetry.ExternalEnvironmentInfo(es, self.metrics_store),
        ])
        # We create a pseudo-cluster here to get information about all nodes.
        # cluster nodes will be populated by the external environment info telemetry device. We cannot know this
        # upfront.
        c = cluster.Cluster(hosts, [], t)
        user_defined_version = self.cfg.opts("mechanic", "distribution.version", mandatory=False)
        # noinspection PyBroadException
        try:
            distribution_version = es.info()["version"]["number"]
        except BaseException:
            self.logger.exception("Could not retrieve cluster distribution version")
            distribution_version = None
        if not user_defined_version or user_defined_version.strip() == "":
            self.logger.info("Distribution version was not specified by user. Rally-determined version is [%s]",
                             distribution_version)
            self.cfg.add(config.Scope.benchmark, "mechanic", "distribution.version", distribution_version)
        elif user_defined_version != distribution_version:
            self.logger.warning("Distribution version '%s' on command line differs from actual cluster version '%s'.",
                                user_defined_version, distribution_version)
        t.attach_to_cluster(c)
        return c.nodes
Пример #9
0
    def _start_node(self, node, car, es, binary_path):
        node_name = self._node_name(node)
        host_name = socket.gethostname()

        enabled_devices = self.cfg.opts("mechanic", "telemetry.devices")

        node_telemetry = [
            telemetry.FlightRecorder(self.node_telemetry_dir),
            telemetry.JitCompiler(self.node_telemetry_dir),
            telemetry.Gc(self.node_telemetry_dir),
            telemetry.PerfStat(self.node_telemetry_dir),
            telemetry.DiskIo(self.metrics_store),
            telemetry.CpuUsage(self.metrics_store),
            telemetry.EnvironmentInfo(es, self.metrics_store),
        ]

        t = telemetry.Telemetry(enabled_devices, devices=node_telemetry)

        env = self._prepare_env(car, node_name, t)
        cmd = self.prepare_cmd(car, node_name)
        process = self._start_process(cmd, env, node_name, binary_path)
        node = cluster.Node(process, host_name, node_name, t)
        logger.info("Cluster node [%s] has successfully started. Attaching telemetry devices to node." % node_name)
        t.attach_to_node(node)
        logger.info("Telemetry devices are now attached to node [%s]." % node_name)

        return node
Пример #10
0
    def start(self, car, binary, data_paths):
        hosts = self.cfg.opts("client", "hosts")
        client_options = self.cfg.opts("client", "options")
        es = client.EsClientFactory(hosts, client_options).create()

        # we're very specific which nodes we kill as there is potentially also an Elasticsearch based metrics store running on this machine
        node_prefix = self.cfg.opts("provisioning", "node.name.prefix")
        process.kill_running_es_instances(node_prefix)

        logger.info("Starting a cluster based on car [%s] with [%d] nodes." % (car, car.nodes))

        # TODO dm: Get rid of these...
        enabled_devices = self.cfg.opts("mechanic", "telemetry.devices")

        cluster_telemetry = [
            # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster
            telemetry.MergeParts(self.metrics_store, self.node_log_dir),
            telemetry.EnvironmentInfo(es, self.metrics_store),
            telemetry.NodeStats(es, self.metrics_store),
            telemetry.IndexStats(es, self.metrics_store),
            # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster
            telemetry.IndexSize(data_paths, self.metrics_store)
        ]
        t = telemetry.Telemetry(enabled_devices, devices=cluster_telemetry)
        c = cluster.Cluster(hosts, [self._start_node(node, car, es, binary) for node in range(car.nodes)], t)
        logger.info("All cluster nodes have successfully started. Checking if REST API is available.")
        if wait_for_rest_layer(es):
            logger.info("REST API is available. Attaching telemetry devices to cluster.")
            t.attach_to_cluster(c)
            logger.info("Telemetry devices are now attached to the cluster.")
        else:
            logger.error("REST API layer is not yet available. Forcefully terminating cluster.")
            self.stop(c)
            raise exceptions.LaunchError("Elasticsearch REST API layer is not available. Forcefully terminated cluster.")
        return c
Пример #11
0
    def test_stores_index_size_for_data_paths(self, run_subprocess,
                                              metrics_store_node_count,
                                              get_size):
        get_size.side_effect = [2048, 16384]

        cfg = create_config()
        metrics_store = metrics.EsMetricsStore(cfg)
        device = telemetry.IndexSize(
            ["/var/elasticsearch/data/1", "/var/elasticsearch/data/2"],
            metrics_store)
        t = telemetry.Telemetry(enabled_devices=[], devices=[device])
        node = cluster.Node(process=None,
                            host_name="localhost",
                            node_name="rally-node-0",
                            telemetry=t)
        t.attach_to_node(node)
        t.on_benchmark_start()
        t.on_benchmark_stop()
        t.detach_from_node(node, running=True)
        t.detach_from_node(node, running=False)

        metrics_store_node_count.assert_has_calls([
            mock.call("rally-node-0", "final_index_size_bytes", 18432, "byte")
        ])

        run_subprocess.assert_has_calls([
            mock.call("find /var/elasticsearch/data/1 -ls",
                      header="index files:"),
            mock.call("find /var/elasticsearch/data/2 -ls",
                      header="index files:")
        ])
Пример #12
0
    def _start_node(self, node_configuration, node_count_on_host, java_major_version):
        host_name = node_configuration.ip
        node_name = node_configuration.node_name
        car = node_configuration.car
        binary_path = node_configuration.binary_path
        data_paths = node_configuration.data_paths
        node_telemetry_dir = "%s/telemetry" % node_configuration.node_root_path

        logger.info("Starting node [%s] based on car [%s]." % (node_name, car))

        enabled_devices = self.cfg.opts("mechanic", "telemetry.devices")
        node_telemetry = [
            telemetry.FlightRecorder(node_telemetry_dir, java_major_version),
            telemetry.JitCompiler(node_telemetry_dir),
            telemetry.Gc(node_telemetry_dir, java_major_version),
            telemetry.PerfStat(node_telemetry_dir),
            telemetry.DiskIo(self.metrics_store, node_count_on_host),
            telemetry.CpuUsage(self.metrics_store),
            telemetry.NodeEnvironmentInfo(self.metrics_store),
            telemetry.IndexSize(data_paths, self.metrics_store),
            telemetry.MergeParts(self.metrics_store, node_configuration.log_path),
        ]

        t = telemetry.Telemetry(enabled_devices, devices=node_telemetry)

        env = self._prepare_env(car, node_name, t)
        node_process = self._start_process(env, node_name, binary_path)
        node = cluster.Node(node_process, host_name, node_name, t)
        logger.info("Node [%s] has successfully started. Attaching telemetry devices." % node_name)
        t.attach_to_node(node)
        logger.info("Telemetry devices are now attached to node [%s]." % node_name)

        return node
Пример #13
0
    def test_stores_cluster_level_metrics_on_attach(self, metrics_store_add_meta_info):
        nodes_stats = {
            "nodes": {
                "FCFjozkeTiOpN-SI88YEcg": {
                    "name": "rally0",
                    "host": "127.0.0.1"
                }
            }
        }

        nodes_info = {
            "nodes": {
                "FCFjozkeTiOpN-SI88YEcg": {
                    "name": "rally0",
                    "host": "127.0.0.1",
                    "attributes": {
                        "az": "us_east1"
                    },
                    "os": {
                        "name": "Mac OS X",
                        "version": "10.11.4",
                        "available_processors": 8
                    },
                    "jvm": {
                        "version": "1.8.0_74",
                        "vm_vendor": "Oracle Corporation"
                    }
                }
            }
        }
        cluster_info = {
            "version":
                {
                    "build_hash": "253032b",
                    "number": "5.0.0"

                }
        }
        client = Client(nodes=SubClient(stats=nodes_stats, info=nodes_info), info=cluster_info)
        metrics_store = metrics.EsMetricsStore(self.cfg)
        env_device = telemetry.ExternalEnvironmentInfo(client, metrics_store)
        t = telemetry.Telemetry(devices=[env_device])
        t.attach_to_cluster(cluster.Cluster([], [], t))

        calls = [
            mock.call(metrics.MetaInfoScope.cluster, None, "source_revision", "253032b"),
            mock.call(metrics.MetaInfoScope.cluster, None, "distribution_version", "5.0.0"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "node_name", "rally0"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "host_name", "127.0.0.1"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "os_name", "Mac OS X"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "os_version", "10.11.4"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "cpu_logical_cores", 8),
            mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_vendor", "Oracle Corporation"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_version", "1.8.0_74"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "attribute_az", "us_east1"),
            mock.call(metrics.MetaInfoScope.cluster, None, "attribute_az", "us_east1")
        ]
        metrics_store_add_meta_info.assert_has_calls(calls)
Пример #14
0
    def start(self):
        """
        Performs final startup tasks.

        Precondition: All cluster nodes have been started.
        Postcondition: The cluster is ready to receive HTTP requests or a ``LaunchError`` is raised.

        :return: A representation of the launched cluster.
        """
        enabled_devices = self.cfg.opts("mechanic", "telemetry.devices")
        telemetry_params = self.cfg.opts("mechanic", "telemetry.params")
        all_hosts = self.cfg.opts("client", "hosts").all_hosts
        default_hosts = self.cfg.opts("client", "hosts").default
        preserve = self.cfg.opts("mechanic", "preserve.install")
        skip_rest_api_check = self.cfg.opts("mechanic", "skip.rest.api.check")

        es = {}
        for cluster_name, cluster_hosts in all_hosts.items():
            all_client_options = self.cfg.opts("client", "options").all_client_options
            cluster_client_options = dict(all_client_options[cluster_name])
            # Use retries to avoid aborts on long living connections for telemetry devices
            cluster_client_options["retry-on-timeout"] = True
            es[cluster_name] = self.client_factory(cluster_hosts, cluster_client_options).create()

        es_default = es["default"]

        t = telemetry.Telemetry(enabled_devices, devices=[
            telemetry.NodeStats(telemetry_params, es, self.metrics_store),
            telemetry.ClusterMetaDataInfo(es_default),
            telemetry.ClusterEnvironmentInfo(es_default, self.metrics_store),
            telemetry.JvmStatsSummary(es_default, self.metrics_store),
            telemetry.IndexStats(es_default, self.metrics_store),
            telemetry.MlBucketProcessingTime(es_default, self.metrics_store),
            telemetry.CcrStats(telemetry_params, es, self.metrics_store),
            telemetry.RecoveryStats(telemetry_params, es, self.metrics_store)
        ])

        # The list of nodes will be populated by ClusterMetaDataInfo, so no need to do it here
        c = cluster.Cluster(default_hosts, [], t, preserve)

        if skip_rest_api_check:
            self.logger.info("Skipping REST API check and attaching telemetry devices to cluster.")
            t.attach_to_cluster(c)
            self.logger.info("Telemetry devices are now attached to the cluster.")
        else:
            self.logger.info("All cluster nodes have successfully started. Checking if REST API is available.")
            if wait_for_rest_layer(es_default, max_attempts=40):
                self.logger.info("REST API is available. Attaching telemetry devices to cluster.")
                t.attach_to_cluster(c)
                self.logger.info("Telemetry devices are now attached to the cluster.")
            else:
                # Just stop the cluster here and raise. The caller is responsible for terminating individual nodes.
                self.logger.error("REST API layer is not yet available. Forcefully terminating cluster.")
                self.stop(c)
                raise exceptions.LaunchError(
                    "Elasticsearch REST API layer is not available. Forcefully terminated cluster.")
        return c
Пример #15
0
    def test_fallback_when_host_not_available(self,
                                              metrics_store_add_meta_info):
        nodes_stats = {
            "nodes": {
                "FCFjozkeTiOpN-SI88YEcg": {
                    "name": "rally0",
                }
            }
        }

        nodes_info = {
            "nodes": {
                "FCFjozkeTiOpN-SI88YEcg": {
                    "name": "rally0",
                    "os": {
                        "name": "Mac OS X",
                        "version": "10.11.4",
                        "available_processors": 8
                    },
                    "jvm": {
                        "version": "1.8.0_74",
                        "vm_vendor": "Oracle Corporation"
                    }
                }
            }
        }
        cluster_info = {"version": {"build_hash": "abc123"}}
        client = Client(cluster=SubClient(nodes_stats),
                        nodes=SubClient(nodes_info),
                        info=cluster_info)
        metrics_store = metrics.EsMetricsStore(self.cfg)
        env_device = telemetry.ExternalEnvironmentInfo(self.cfg, client,
                                                       metrics_store)
        t = telemetry.Telemetry(self.cfg, devices=[env_device])
        t.attach_to_cluster(cluster.Cluster([], t))

        calls = [
            mock.call(metrics.MetaInfoScope.cluster, None, "source_revision",
                      "abc123"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "node_name",
                      "rally0"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "host_name",
                      "unknown"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "os_name",
                      "Mac OS X"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "os_version",
                      "10.11.4"),
            mock.call(metrics.MetaInfoScope.node, "rally0",
                      "cpu_logical_cores", 8),
            mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_vendor",
                      "Oracle Corporation"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_version",
                      "1.8.0_74")
        ]
        metrics_store_add_meta_info.assert_has_calls(calls)
Пример #16
0
 def _start_node(self, host, node, es):
     node_name = self._node_name(node)
     p = self._start_process(cmd="docker-compose -f %s up" % self.binary_path, node_name=node_name)
     # only support a subset of telemetry for Docker hosts (specifically, we do not allow users to enable any devices)
     node_telemetry = [
         telemetry.DiskIo(self.metrics_store),
         telemetry.CpuUsage(self.metrics_store),
         telemetry.EnvironmentInfo(es, self.metrics_store)
     ]
     t = telemetry.Telemetry(devices=node_telemetry)
     return cluster.Node(p, host["host"], node_name, t)
Пример #17
0
    def test_stores_available_index_stats(self, metrics_store_cluster_count,
                                          metrics_store_cluster_value):
        indices_stats = {
            "_all": {
                "primaries": {
                    "segments": {
                        "count": 5,
                        "memory_in_bytes": 2048,
                        "stored_fields_memory_in_bytes": 1024,
                        "doc_values_memory_in_bytes": 128,
                        "terms_memory_in_bytes": 256,
                        "points_memory_in_bytes": 512
                    },
                    "merges": {
                        "total_time_in_millis": 300,
                        "total_throttled_time_in_millis": 120
                    },
                    "indexing": {
                        "index_time_in_millis": 2000
                    },
                    "refresh": {
                        "total_time_in_millis": 200
                    },
                    "flush": {
                        "total_time_in_millis": 100
                    }
                }
            }
        }

        client = Client(indices=SubClient(indices_stats))
        cfg = create_config()

        metrics_store = metrics.EsMetricsStore(cfg)
        device = telemetry.IndexStats(cfg, client, metrics_store)
        t = telemetry.Telemetry(cfg, devices=[device])
        t.on_benchmark_start()
        t.on_benchmark_stop()

        metrics_store_cluster_count.assert_has_calls(
            [mock.call("segments_count", 5)])
        metrics_store_cluster_value.assert_has_calls([
            mock.call("segments_memory_in_bytes", 2048, "byte"),
            mock.call("segments_doc_values_memory_in_bytes", 128, "byte"),
            mock.call("segments_stored_fields_memory_in_bytes", 1024, "byte"),
            mock.call("segments_terms_memory_in_bytes", 256, "byte"),
            # we don't have norms, so nothing should have been called
            mock.call("segments_points_memory_in_bytes", 512, "byte"),
            mock.call("merges_total_time", 300, "ms"),
            mock.call("merges_total_throttled_time", 120, "ms"),
            mock.call("indexing_total_time", 2000, "ms"),
            mock.call("refresh_total_time", 200, "ms"),
            mock.call("flush_total_time", 100, "ms"),
        ])
Пример #18
0
    def test_stores_nothing_if_no_data_path(self, run_subprocess, metrics_store_cluster_count, get_size):
        get_size.return_value = 2048

        cfg = create_config()

        metrics_store = metrics.EsMetricsStore(cfg)
        device = telemetry.IndexSize(data_paths=[], metrics_store=metrics_store)
        t = telemetry.Telemetry(devices=[device])
        t.attach_to_cluster(None)
        t.on_benchmark_start()
        t.on_benchmark_stop()
        t.detach_from_cluster(None)

        run_subprocess.assert_not_called()
        metrics_store_cluster_count.assert_not_called()
        get_size.assert_not_called()
Пример #19
0
 def start(self, node_configurations):
     nodes = []
     for node_configuration in node_configurations:
         node_name = node_configuration.node_name
         host_name = node_configuration.ip
         binary_path = node_configuration.binary_path
         self.binary_paths[node_name] = binary_path
         self._start_process(binary_path)
         # only support a subset of telemetry for Docker hosts
         # (specifically, we do not allow users to enable any devices)
         node_telemetry = [
             telemetry.DiskIo(self.metrics_store, len(node_configurations)),
             telemetry.NodeEnvironmentInfo(self.metrics_store)
         ]
         t = telemetry.Telemetry(devices=node_telemetry)
         nodes.append(cluster.Node(0, host_name, node_name, t))
     return nodes
Пример #20
0
    def test_stores_nothing_if_no_data_path(self, run_subprocess, metrics_store_cluster_count, get_size):
        get_size.return_value = 2048

        cfg = create_config()

        metrics_store = metrics.EsMetricsStore(cfg)
        device = telemetry.IndexSize(data_paths=[], metrics_store=metrics_store)
        t = telemetry.Telemetry(devices=[device])
        node = cluster.Node(process=None, host_name="localhost", node_name="rally-node-0", telemetry=t)
        t.attach_to_node(node)
        t.on_benchmark_start()
        t.on_benchmark_stop()
        t.detach_from_node(node, running=True)
        t.detach_from_node(node, running=False)

        run_subprocess.assert_not_called()
        metrics_store_cluster_count.assert_not_called()
        get_size.assert_not_called()
Пример #21
0
    def test_stores_index_size_for_data_path(self, run_subprocess, metrics_store_cluster_count, get_size):
        get_size.return_value = 2048

        cfg = create_config()
        metrics_store = metrics.EsMetricsStore(cfg)
        device = telemetry.IndexSize(["/var/elasticsearch/data"], metrics_store)
        t = telemetry.Telemetry(enabled_devices=[], devices=[device])
        t.attach_to_cluster(None)
        t.on_benchmark_start()
        t.on_benchmark_stop()
        t.detach_from_cluster(None)

        metrics_store_cluster_count.assert_has_calls([
            mock.call("final_index_size_bytes", 2048, "byte")
        ])

        run_subprocess.assert_has_calls([
            mock.call("find /var/elasticsearch/data -ls", header="index files:")
        ])
Пример #22
0
    def test_env_options_order(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "mechanic", "keep.running", False)
        cfg.add(config.Scope.application, "system", "env.name", "test")

        ms = get_metrics_store(cfg)
        proc_launcher = launcher.ProcessLauncher(cfg, ms, races_root_dir="/home")
        default_car = team.Car(names="default-car", root_path=None, config_paths=["/tmp/rally-config"])
        
        node_telemetry = [
            telemetry.FlightRecorder(telemetry_params={}, log_root="/tmp/telemetry", java_major_version=8)
            ]
        t = telemetry.Telemetry(["jfr"], devices=node_telemetry)
        env = proc_launcher._prepare_env(car=default_car, node_name="node0", java_home="/java_home", t=t)

        self.assertEqual("/java_home/bin" + os.pathsep + os.environ["PATH"], env["PATH"])
        self.assertEqual("-XX:+ExitOnOutOfMemoryError -XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints " 
                         "-XX:+UnlockCommercialFeatures -XX:+FlightRecorder "
                         "-XX:FlightRecorderOptions=disk=true,maxage=0s,maxsize=0,dumponexit=true,dumponexitpath=/tmp/telemetry/default-car-node0.jfr "
                         "-XX:StartFlightRecording=defaultrecording=true", env["ES_JAVA_OPTS"])
Пример #23
0
 def start(self, node_configurations):
     nodes = []
     for node_configuration in node_configurations:
         node_name = node_configuration.node_name
         host_name = node_configuration.ip
         binary_path = node_configuration.binary_path
         node_telemetry_dir = os.path.join(
             node_configuration.node_root_path, "telemetry")
         self.binary_paths[node_name] = binary_path
         self._start_process(binary_path)
         # only support a subset of telemetry for Docker hosts
         # (specifically, we do not allow users to enable any devices)
         node_telemetry = [
             telemetry.DiskIo(self.metrics_store, len(node_configurations),
                              node_telemetry_dir, node_name),
         ]
         t = telemetry.Telemetry(devices=node_telemetry)
         telemetry.add_metadata_for_node(self.metrics_store, node_name,
                                         host_name)
         nodes.append(cluster.Node(0, host_name, node_name, t))
     return nodes
Пример #24
0
def local_provisioner(cfg, car, plugins, cluster_settings, all_node_ips,
                      target_root, node_id):
    distribution_version = cfg.opts("mechanic",
                                    "distribution.version",
                                    mandatory=False)
    ip = cfg.opts("provisioning", "node.ip")
    http_port = cfg.opts("provisioning", "node.http.port")
    node_name_prefix = cfg.opts("provisioning", "node.name.prefix")
    preserve = cfg.opts("mechanic", "preserve.install")

    node_name = "%s-%d" % (node_name_prefix, node_id)
    node_root_dir = "%s/%s" % (target_root, node_name)

    _, java_home = java_resolver.java_home(car, cfg)

    node_telemetry_dir = os.path.join(node_root_dir, "telemetry")
    java_major_version, java_home = java_resolver.java_home(car, cfg)
    enabled_devices = cfg.opts("mechanic", "telemetry.devices")
    telemetry_params = cfg.opts("mechanic", "telemetry.params")
    node_telemetry = [
        telemetry.FlightRecorder(telemetry_params, node_telemetry_dir,
                                 java_major_version),
        telemetry.JitCompiler(node_telemetry_dir),
        telemetry.Gc(node_telemetry_dir, java_major_version)
    ]
    t = telemetry.Telemetry(enabled_devices, devices=node_telemetry)

    es_installer = ElasticsearchInstaller(car, java_home, node_name,
                                          node_root_dir, all_node_ips, ip,
                                          http_port)
    plugin_installers = [
        PluginInstaller(plugin, java_home) for plugin in plugins
    ]

    return BareProvisioner(cluster_settings,
                           es_installer,
                           plugin_installers,
                           preserve,
                           t,
                           distribution_version=distribution_version)
Пример #25
0
    def start(self, car=None):
        console.println(ExternalLauncher.BOGUS_RESULTS_WARNING)

        hosts = self.cfg.opts("launcher", "external.target.hosts")
        client_options = self.cfg.opts("launcher", "client.options")
        # unified client config
        self.cfg.add(config.Scope.benchmark, "client", "hosts", hosts)
        self.cfg.add(config.Scope.benchmark, "client", "options",
                     client_options)

        es = self.client_factory(hosts, client_options).create()

        t = telemetry.Telemetry(
            self.cfg,
            devices=[
                telemetry.ExternalEnvironmentInfo(self.cfg, es,
                                                  self.metrics_store),
                telemetry.NodeStats(self.cfg, es, self.metrics_store),
                telemetry.IndexStats(self.cfg, es, self.metrics_store)
            ])
        c = cluster.Cluster([], t)
        user_defined_version = self.cfg.opts("source",
                                             "distribution.version",
                                             mandatory=False)
        distribution_version = es.info()["version"]["number"]
        if not user_defined_version or user_defined_version.strip() == "":
            logger.info(
                "Distribution version was not specified by user. Rally-determined version is [%s]"
                % distribution_version)
            self.cfg.add(config.Scope.benchmark, "source",
                         "distribution.version", distribution_version)
        elif user_defined_version != distribution_version:
            console.println(
                "Warning: Specified distribution version '%s' on the command line differs from version '%s' reported by the cluster."
                % (user_defined_version, distribution_version),
                logger=logger.warn)
        t.attach_to_cluster(c)
        return c
Пример #26
0
    def _start_node(self, node, car, es):
        node_name = self._node_name(node)
        host_name = socket.gethostname()

        node_telemetry = [
            telemetry.FlightRecorder(self.cfg, self.metrics_store),
            telemetry.JitCompiler(self.cfg, self.metrics_store),
            telemetry.Gc(self.cfg, self.metrics_store),
            telemetry.PerfStat(self.cfg, self.metrics_store),
            telemetry.DiskIo(self.cfg, self.metrics_store),
            telemetry.CpuUsage(self.cfg, self.metrics_store),
            telemetry.EnvironmentInfo(self.cfg, es, self.metrics_store),
        ]

        t = telemetry.Telemetry(self.cfg, devices=node_telemetry)

        env = self._prepare_env(car, node_name, t)
        cmd = self.prepare_cmd(car, node_name)
        process = self._start_process(cmd, env, node_name)
        node = cluster.Node(process, host_name, node_name, t)
        t.attach_to_node(node)

        return node
Пример #27
0
    def test_index_stats_are_per_lap(self, metrics_store_cluster_count,
                                     metrics_store_cluster_value):
        client = Client(indices=SubClient({
            "_all": {
                "primaries": {
                    "segments": {
                        "count": 0
                    },
                    "merges": {
                        "total_time_in_millis": 0,
                        "total_throttled_time_in_millis": 0
                    },
                    "indexing": {
                        "index_time_in_millis": 0
                    },
                    "refresh": {
                        "total_time_in_millis": 0
                    },
                    "flush": {
                        "total_time_in_millis": 0
                    }
                }
            }
        }))
        cfg = create_config()

        metrics_store = metrics.EsMetricsStore(cfg)
        device = telemetry.IndexStats(client, metrics_store)
        t = telemetry.Telemetry(cfg, devices=[device])
        # lap 1
        t.on_benchmark_start()

        client.indices = SubClient({
            "_all": {
                "primaries": {
                    "segments": {
                        "count": 5,
                        "memory_in_bytes": 2048,
                        "stored_fields_memory_in_bytes": 1024,
                        "doc_values_memory_in_bytes": 128,
                        "terms_memory_in_bytes": 256
                    },
                    "merges": {
                        "total_time_in_millis": 300,
                        "total_throttled_time_in_millis": 120
                    },
                    "indexing": {
                        "index_time_in_millis": 2000
                    },
                    "refresh": {
                        "total_time_in_millis": 200
                    },
                    "flush": {
                        "total_time_in_millis": 100
                    }
                }
            }
        })

        t.on_benchmark_stop()
        # lap 2
        t.on_benchmark_start()

        client.indices = SubClient({
            "_all": {
                "primaries": {
                    "segments": {
                        "count": 7,
                        "memory_in_bytes": 2048,
                        "stored_fields_memory_in_bytes": 1024,
                        "doc_values_memory_in_bytes": 128,
                        "terms_memory_in_bytes": 256
                    },
                    "merges": {
                        "total_time_in_millis": 900,
                        "total_throttled_time_in_millis": 120
                    },
                    "indexing": {
                        "index_time_in_millis": 8000
                    },
                    "refresh": {
                        "total_time_in_millis": 500
                    },
                    "flush": {
                        "total_time_in_millis": 300
                    }
                }
            }
        })

        t.on_benchmark_stop()

        metrics_store_cluster_value.assert_has_calls(
            [
                # 1st lap
                mock.call("segments_memory_in_bytes", 2048, "byte"),
                mock.call("merges_total_time", 300, "ms"),
                mock.call("merges_total_throttled_time", 120, "ms"),
                mock.call("indexing_total_time", 2000, "ms"),
                mock.call("refresh_total_time", 200, "ms"),
                mock.call("flush_total_time", 100, "ms"),
                mock.call("segments_doc_values_memory_in_bytes", 128, "byte"),
                mock.call("segments_stored_fields_memory_in_bytes", 1024,
                          "byte"),
                mock.call("segments_terms_memory_in_bytes", 256, "byte"),
                # we don't have norms or points, so nothing should have been called

                # 2nd lap
                mock.call("segments_memory_in_bytes", 2048, "byte"),
                mock.call("merges_total_time", 900, "ms"),
                mock.call("merges_total_throttled_time", 120, "ms"),
                mock.call("indexing_total_time", 8000, "ms"),
                mock.call("refresh_total_time", 500, "ms"),
                mock.call("flush_total_time", 300, "ms"),
                mock.call("segments_doc_values_memory_in_bytes", 128, "byte"),
                mock.call("segments_stored_fields_memory_in_bytes", 1024,
                          "byte"),
                mock.call("segments_terms_memory_in_bytes", 256, "byte"),
            ],
            any_order=True)
Пример #28
0
    def test_stores_only_diff_of_gc_times(self, metrics_store_node_level,
                                          metrics_store_cluster_level):
        nodes_stats_at_start = {
            "nodes": {
                "FCFjozkeTiOpN-SI88YEcg": {
                    "name": "rally0",
                    "host": "127.0.0.1",
                    "jvm": {
                        "gc": {
                            "collectors": {
                                "old": {
                                    "collection_time_in_millis": 1000
                                },
                                "young": {
                                    "collection_time_in_millis": 500
                                }
                            }
                        }
                    }
                }
            }
        }

        client = Client(nodes=SubClient(nodes_stats_at_start))
        cfg = create_config()

        metrics_store = metrics.EsMetricsStore(cfg)
        device = telemetry.NodeStats(client, metrics_store)
        t = telemetry.Telemetry(cfg, devices=[device])
        t.on_benchmark_start()
        # now we'd need to change the node stats response
        nodes_stats_at_end = {
            "nodes": {
                "FCFjozkeTiOpN-SI88YEcg": {
                    "name": "rally0",
                    "host": "127.0.0.1",
                    "jvm": {
                        "gc": {
                            "collectors": {
                                "old": {
                                    "collection_time_in_millis": 2500
                                },
                                "young": {
                                    "collection_time_in_millis": 1200
                                }
                            }
                        }
                    }
                }
            }
        }
        client.nodes = SubClient(nodes_stats_at_end)
        t.on_benchmark_stop()

        metrics_store_node_level.assert_has_calls([
            mock.call("rally0", "node_young_gen_gc_time", 700, "ms"),
            mock.call("rally0", "node_old_gen_gc_time", 1500, "ms")
        ])

        metrics_store_cluster_level.assert_has_calls([
            mock.call("node_total_young_gen_gc_time", 700, "ms"),
            mock.call("node_total_old_gen_gc_time", 1500, "ms")
        ])
Пример #29
0
    def test_enriches_cluster_nodes_for_elasticsearch_1_x(self):
        nodes_stats = {
            "nodes": {
                "FCFjozkeTiOpN-SI88YEcg": {
                    "name": "rally0",
                    "host": "127.0.0.1",
                    "fs": {
                        "data": [{
                            "mount": "/usr/local/var/elasticsearch/data1",
                            "type": "hfs"
                        }, {
                            "mount": "/usr/local/var/elasticsearch/data2",
                            "type": "ntfs"
                        }]
                    }
                }
            }
        }

        nodes_info = {
            "nodes": {
                "FCFjozkeTiOpN-SI88YEcg": {
                    "name": "rally0",
                    "host": "127.0.0.1",
                    "ip": "127.0.0.1",
                    "os": {
                        "name": "Mac OS X",
                        "version": "10.11.4",
                        "available_processors": 8,
                        "mem": {
                            "total_in_bytes": 17179869184
                        }
                    },
                    "jvm": {
                        "version": "1.8.0_74",
                        "vm_vendor": "Oracle Corporation"
                    }
                }
            }
        }
        cluster_info = {
            "version": {
                "build_hash": "c730b59357f8ebc555286794dcd90b3411f517c9",
                "number": "1.7.5"
            }
        }
        client = Client(nodes=SubClient(stats=nodes_stats, info=nodes_info),
                        info=cluster_info)

        t = telemetry.Telemetry(
            devices=[telemetry.ClusterMetaDataInfo(client)])

        c = cluster.Cluster(hosts=[{
            "host": "localhost",
            "port": 39200
        }],
                            nodes=[
                                cluster.Node(process=None,
                                             host_name="local",
                                             node_name="rally0",
                                             telemetry=None)
                            ],
                            telemetry=t)

        t.attach_to_cluster(c)

        self.assertEqual("1.7.5", c.distribution_version)
        self.assertEqual("c730b59357f8ebc555286794dcd90b3411f517c9",
                         c.source_revision)
        self.assertEqual(1, len(c.nodes))
        n = c.nodes[0]
        self.assertEqual("127.0.0.1", n.ip)
        self.assertEqual("Mac OS X", n.os["name"])
        self.assertEqual("10.11.4", n.os["version"])
        self.assertEqual("Oracle Corporation", n.jvm["vendor"])
        self.assertEqual("1.8.0_74", n.jvm["version"])
        self.assertEqual(8, n.cpu["available_processors"])
        self.assertIsNone(n.cpu["allocated_processors"])
        self.assertEqual(17179869184, n.memory["total_bytes"])

        self.assertEqual(2, len(n.fs))
        self.assertEqual("/usr/local/var/elasticsearch/data1",
                         n.fs[0]["mount"])
        self.assertEqual("hfs", n.fs[0]["type"])
        self.assertEqual("unknown", n.fs[0]["spins"])
        self.assertEqual("/usr/local/var/elasticsearch/data2",
                         n.fs[1]["mount"])
        self.assertEqual("ntfs", n.fs[1]["type"])
        self.assertEqual("unknown", n.fs[1]["spins"])
Пример #30
0
    def test_enriches_cluster_nodes_for_elasticsearch_after_1_x(self):
        nodes_stats = {
            "nodes": {
                "FCFjozkeTiOpN-SI88YEcg": {
                    "name": "rally0",
                    "host": "127.0.0.1",
                    "os": {
                        "mem": {
                            "total_in_bytes": 17179869184
                        }
                    },
                    "fs": {
                        "data": [{
                            "mount": "/usr/local/var/elasticsearch/data1",
                            "type": "hfs"
                        }, {
                            "mount": "/usr/local/var/elasticsearch/data2",
                            "type": "ntfs"
                        }]
                    }
                }
            }
        }

        nodes_info = {
            "nodes": {
                "FCFjozkeTiOpN-SI88YEcg": {
                    "name":
                    "rally0",
                    "host":
                    "127.0.0.1",
                    "ip":
                    "127.0.0.1",
                    "os": {
                        "name": "Mac OS X",
                        "version": "10.11.4",
                        "available_processors": 8,
                        "allocated_processors": 4
                    },
                    "jvm": {
                        "version": "1.8.0_74",
                        "vm_vendor": "Oracle Corporation"
                    },
                    "plugins": [{
                        "name": "analysis-icu",
                        "version": "5.0.0",
                        "description":
                        "The ICU Analysis plugin integrates Lucene ICU module ...",
                        "classname":
                        "org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin",
                        "has_native_controller": False
                    }, {
                        "name": "ingest-geoip",
                        "version": "5.0.0",
                        "description":
                        "Ingest processor that uses looksup geo data ...",
                        "classname":
                        "org.elasticsearch.ingest.geoip.IngestGeoIpPlugin",
                        "has_native_controller": False
                    }, {
                        "name": "ingest-user-agent",
                        "version": "5.0.0",
                        "description":
                        "Ingest processor that extracts information from a user agent",
                        "classname":
                        "org.elasticsearch.ingest.useragent.IngestUserAgentPlugin",
                        "has_native_controller": False
                    }]
                }
            }
        }
        cluster_info = {
            "version": {
                "build_hash": "253032b",
                "number": "5.0.0"
            }
        }
        client = Client(nodes=SubClient(stats=nodes_stats, info=nodes_info),
                        info=cluster_info)

        t = telemetry.Telemetry(
            devices=[telemetry.ClusterMetaDataInfo(client)])

        c = cluster.Cluster(hosts=[{
            "host": "localhost",
            "port": 39200
        }],
                            nodes=[
                                cluster.Node(process=None,
                                             host_name="local",
                                             node_name="rally0",
                                             telemetry=None)
                            ],
                            telemetry=t)

        t.attach_to_cluster(c)

        self.assertEqual("5.0.0", c.distribution_version)
        self.assertEqual("253032b", c.source_revision)
        self.assertEqual(1, len(c.nodes))
        n = c.nodes[0]
        self.assertEqual("127.0.0.1", n.ip)
        self.assertEqual("Mac OS X", n.os["name"])
        self.assertEqual("10.11.4", n.os["version"])
        self.assertEqual("Oracle Corporation", n.jvm["vendor"])
        self.assertEqual("1.8.0_74", n.jvm["version"])
        self.assertEqual(8, n.cpu["available_processors"])
        self.assertEqual(4, n.cpu["allocated_processors"])
        self.assertEqual(17179869184, n.memory["total_bytes"])

        self.assertEqual(2, len(n.fs))
        self.assertEqual("/usr/local/var/elasticsearch/data1",
                         n.fs[0]["mount"])
        self.assertEqual("hfs", n.fs[0]["type"])
        self.assertEqual("unknown", n.fs[0]["spins"])
        self.assertEqual("/usr/local/var/elasticsearch/data2",
                         n.fs[1]["mount"])
        self.assertEqual("ntfs", n.fs[1]["type"])
        self.assertEqual("unknown", n.fs[1]["spins"])
        self.assertEqual(["analysis-icu", "ingest-geoip", "ingest-user-agent"],
                         n.plugins)