示例#1
0
    def start(self, car):
        port = self.cfg.opts("provisioning", "node.http.port")
        hosts = [{"host": "localhost", "port": port}]
        client_options = self.cfg.opts("launcher", "client.options")
        # unified client config
        self.cfg.add(config.Scope.benchmark, "client", "hosts", hosts)
        self.cfg.add(config.Scope.benchmark, "client", "options",
                     client_options)

        es = client.EsClientFactory(hosts, client_options).create()

        # we're very specific which nodes we kill as there is potentially also an Elasticsearch based metrics store running on this machine
        node_prefix = self.cfg.opts("provisioning", "node.name.prefix")
        process.kill_running_es_instances(node_prefix)

        logger.info("Starting a cluster based on car [%s] with [%d] nodes." %
                    (car, car.nodes))

        cluster_telemetry = [
            # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster
            telemetry.MergeParts(self.cfg, self.metrics_store),
            telemetry.EnvironmentInfo(self.cfg, es, self.metrics_store),
            telemetry.NodeStats(self.cfg, es, self.metrics_store),
            telemetry.IndexStats(self.cfg, es, self.metrics_store),
            # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster
            telemetry.IndexSize(self.cfg, self.metrics_store)
        ]

        t = telemetry.Telemetry(self.cfg, devices=cluster_telemetry)
        c = cluster.Cluster(
            [self._start_node(node, car, es) for node in range(car.nodes)], t)
        t.attach_to_cluster(c)
        return c
示例#2
0
    def test_stores_cluster_level_metrics_on_attach(self, metrics_store_add_meta_info):
        nodes_info = {"nodes": collections.OrderedDict()}
        nodes_info["nodes"]["FCFjozkeTiOpN-SI88YEcg"] = {
            "name": "rally0",
            "host": "127.0.0.1",
            "attributes": {
                "group": "cold_nodes"
            },
            "os": {
                "name": "Mac OS X",
                "version": "10.11.4",
                "available_processors": 8
            },
            "jvm": {
                "version": "1.8.0_74",
                "vm_vendor": "Oracle Corporation"
            }
        }
        nodes_info["nodes"]["EEEjozkeTiOpN-SI88YEcg"] = {
            "name": "rally1",
            "host": "127.0.0.1",
            "attributes": {
                "group": "hot_nodes"
            },
            "os": {
                "name": "Mac OS X",
                "version": "10.11.5",
                "available_processors": 8
            },
            "jvm": {
                "version": "1.8.0_102",
                "vm_vendor": "Oracle Corporation"
            }
        }

        cluster_info = {
            "version":
                {
                    "build_hash": "abc123",
                    "number": "6.0.0-alpha1"
                }
        }

        client = Client(nodes=SubClient(info=nodes_info), info=cluster_info)
        metrics_store = metrics.EsMetricsStore(self.cfg)
        env_device = telemetry.EnvironmentInfo(client, metrics_store)
        t = telemetry.Telemetry(self.cfg, devices=[env_device])
        t.attach_to_cluster(cluster.Cluster([], [], t))
        calls = [
            mock.call(metrics.MetaInfoScope.cluster, None, "source_revision", "abc123"),
            mock.call(metrics.MetaInfoScope.cluster, None, "distribution_version", "6.0.0-alpha1"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_vendor", "Oracle Corporation"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_version", "1.8.0_74"),
            mock.call(metrics.MetaInfoScope.node, "rally1", "jvm_vendor", "Oracle Corporation"),
            mock.call(metrics.MetaInfoScope.node, "rally1", "jvm_version", "1.8.0_102"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "attribute_group", "cold_nodes"),
            mock.call(metrics.MetaInfoScope.node, "rally1", "attribute_group", "hot_nodes")
        ]

        metrics_store_add_meta_info.assert_has_calls(calls)
示例#3
0
    def test_stores_node_level_metrics_on_attach(self, cpu_model,
                                                 physical_cpu_cores,
                                                 logical_cpu_cores, os_version,
                                                 os_name,
                                                 metrics_store_add_meta_info):
        cpu_model.return_value = "Intel(R) Core(TM) i7-4870HQ CPU @ 2.50GHz"
        physical_cpu_cores.return_value = 4
        logical_cpu_cores.return_value = 8
        os_version.return_value = "4.2.0-18-generic"
        os_name.return_value = "Linux"

        metrics_store = metrics.EsMetricsStore(self.cfg)
        node = cluster.Node(None, "io", "rally0", None)
        env_device = telemetry.EnvironmentInfo(self.cfg, None, metrics_store)
        env_device.attach_to_node(node)

        calls = [
            mock.call(metrics.MetaInfoScope.node, "rally0", "os_name",
                      "Linux"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "os_version",
                      "4.2.0-18-generic"),
            mock.call(metrics.MetaInfoScope.node, "rally0",
                      "cpu_logical_cores", 8),
            mock.call(metrics.MetaInfoScope.node, "rally0",
                      "cpu_physical_cores", 4),
            mock.call(metrics.MetaInfoScope.node, "rally0", "cpu_model",
                      "Intel(R) Core(TM) i7-4870HQ CPU @ 2.50GHz"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "node_name",
                      "rally0"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "host_name", "io"),
        ]

        metrics_store_add_meta_info.assert_has_calls(calls)
示例#4
0
    def start(self, car, binary, data_paths):
        self.binary_path = binary

        hosts = self.cfg.opts("client", "hosts")
        client_options = self.cfg.opts("client", "options")
        es = self.client_factory(hosts, client_options).create()

        # Cannot enable custom telemetry devices here
        t = telemetry.Telemetry(devices=[
            # Be aware that some the meta-data are taken from the host system, not the container (e.g. number of CPU cores) so if the
            # Docker container constrains these, the metrics are actually wrong.
            telemetry.EnvironmentInfo(es, self.metrics_store),
            telemetry.NodeStats(es, self.metrics_store),
            telemetry.IndexStats(es, self.metrics_store),
            telemetry.DiskIo(self.metrics_store),
            telemetry.CpuUsage(self.metrics_store)
        ])

        c = cluster.Cluster(hosts, [], t)
        self._start_process(cmd="docker-compose -f %s up" % self.binary_path, node_name="rally0")
        logger.info("Docker container has successfully started. Checking if REST API is available.")
        if wait_for_rest_layer(es):
            logger.info("REST API is available. Attaching telemetry devices to cluster.")
            t.attach_to_cluster(c)
            logger.info("Telemetry devices are now attached to the cluster.")
        else:
            logger.error("REST API layer is not yet available. Forcefully terminating cluster.")
            self.stop(c)
            raise exceptions.LaunchError("Elasticsearch REST API layer is not available. Forcefully terminated cluster.")
        return c
示例#5
0
    def _start_node(self, node, car, es, binary_path):
        node_name = self._node_name(node)
        host_name = socket.gethostname()

        enabled_devices = self.cfg.opts("mechanic", "telemetry.devices")

        node_telemetry = [
            telemetry.FlightRecorder(self.node_telemetry_dir),
            telemetry.JitCompiler(self.node_telemetry_dir),
            telemetry.Gc(self.node_telemetry_dir),
            telemetry.PerfStat(self.node_telemetry_dir),
            telemetry.DiskIo(self.metrics_store),
            telemetry.CpuUsage(self.metrics_store),
            telemetry.EnvironmentInfo(es, self.metrics_store),
        ]

        t = telemetry.Telemetry(enabled_devices, devices=node_telemetry)

        env = self._prepare_env(car, node_name, t)
        cmd = self.prepare_cmd(car, node_name)
        process = self._start_process(cmd, env, node_name, binary_path)
        node = cluster.Node(process, host_name, node_name, t)
        logger.info("Cluster node [%s] has successfully started. Attaching telemetry devices to node." % node_name)
        t.attach_to_node(node)
        logger.info("Telemetry devices are now attached to node [%s]." % node_name)

        return node
示例#6
0
    def start(self, car, binary, data_paths):
        hosts = self.cfg.opts("client", "hosts")
        client_options = self.cfg.opts("client", "options")
        es = client.EsClientFactory(hosts, client_options).create()

        # we're very specific which nodes we kill as there is potentially also an Elasticsearch based metrics store running on this machine
        node_prefix = self.cfg.opts("provisioning", "node.name.prefix")
        process.kill_running_es_instances(node_prefix)

        logger.info("Starting a cluster based on car [%s] with [%d] nodes." % (car, car.nodes))

        # TODO dm: Get rid of these...
        enabled_devices = self.cfg.opts("mechanic", "telemetry.devices")

        cluster_telemetry = [
            # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster
            telemetry.MergeParts(self.metrics_store, self.node_log_dir),
            telemetry.EnvironmentInfo(es, self.metrics_store),
            telemetry.NodeStats(es, self.metrics_store),
            telemetry.IndexStats(es, self.metrics_store),
            # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster
            telemetry.IndexSize(data_paths, self.metrics_store)
        ]
        t = telemetry.Telemetry(enabled_devices, devices=cluster_telemetry)
        c = cluster.Cluster(hosts, [self._start_node(node, car, es, binary) for node in range(car.nodes)], t)
        logger.info("All cluster nodes have successfully started. Checking if REST API is available.")
        if wait_for_rest_layer(es):
            logger.info("REST API is available. Attaching telemetry devices to cluster.")
            t.attach_to_cluster(c)
            logger.info("Telemetry devices are now attached to the cluster.")
        else:
            logger.error("REST API layer is not yet available. Forcefully terminating cluster.")
            self.stop(c)
            raise exceptions.LaunchError("Elasticsearch REST API layer is not available. Forcefully terminated cluster.")
        return c
示例#7
0
 def _start_node(self, host, node, es):
     node_name = self._node_name(node)
     p = self._start_process(cmd="docker-compose -f %s up" % self.binary_path, node_name=node_name)
     # only support a subset of telemetry for Docker hosts (specifically, we do not allow users to enable any devices)
     node_telemetry = [
         telemetry.DiskIo(self.metrics_store),
         telemetry.CpuUsage(self.metrics_store),
         telemetry.EnvironmentInfo(es, self.metrics_store)
     ]
     t = telemetry.Telemetry(devices=node_telemetry)
     return cluster.Node(p, host["host"], node_name, t)
示例#8
0
    def _start_node(self, node, car, es):
        node_name = self._node_name(node)
        host_name = socket.gethostname()

        node_telemetry = [
            telemetry.FlightRecorder(self.cfg, self.metrics_store),
            telemetry.JitCompiler(self.cfg, self.metrics_store),
            telemetry.Gc(self.cfg, self.metrics_store),
            telemetry.PerfStat(self.cfg, self.metrics_store),
            telemetry.DiskIo(self.cfg, self.metrics_store),
            telemetry.CpuUsage(self.cfg, self.metrics_store),
            telemetry.EnvironmentInfo(self.cfg, es, self.metrics_store),
        ]

        t = telemetry.Telemetry(self.cfg, devices=node_telemetry)

        env = self._prepare_env(car, node_name, t)
        cmd = self.prepare_cmd(car, node_name)
        process = self._start_process(cmd, env, node_name)
        node = cluster.Node(process, host_name, node_name, t)
        t.attach_to_node(node)

        return node
示例#9
0
    def start(self, car):
        # hardcoded for the moment, should actually be identical to internal launcher
        # Only needed on Mac:
        # hosts = [{"host": process.run_subprocess_with_output("docker-machine ip default")[0].strip(), "port": 9200}]
        hosts = [{"host": "localhost", "port": 9200}]
        client_options = self.cfg.opts("launcher", "client.options")
        # unified client config
        self.cfg.add(config.Scope.benchmark, "client", "hosts", hosts)
        self.cfg.add(config.Scope.benchmark, "client", "options",
                     client_options)

        es = self.client_factory(hosts, client_options).create()

        t = telemetry.Telemetry(
            self.cfg,
            devices=[
                # Be aware that some the meta-data are taken from the host system, not the container (e.g. number of CPU cores) so if the
                # Docker container constrains these, the metrics are actually wrong.
                telemetry.EnvironmentInfo(self.cfg, es, self.metrics_store),
                telemetry.NodeStats(self.cfg, es, self.metrics_store),
                telemetry.IndexStats(self.cfg, es, self.metrics_store),
                telemetry.DiskIo(self.cfg, self.metrics_store),
                telemetry.CpuUsage(self.cfg, self.metrics_store)
            ])

        distribution_version = self.cfg.opts("source",
                                             "distribution.version",
                                             mandatory=False)

        install_dir = self._install_dir()
        io.ensure_dir(install_dir)

        java_opts = ""
        if car.heap:
            java_opts += "-Xms%s -Xmx%s " % (car.heap, car.heap)
        if car.java_opts:
            java_opts += car.java_opts

        vars = {
            "es_java_opts":
            java_opts,
            "container_memory_gb":
            "%dg" % (convert.bytes_to_gb(psutil.virtual_memory().total) // 2),
            "es_data_dir":
            "%s/data" % install_dir,
            "es_version":
            distribution_version
        }

        docker_cfg = self._render_template_from_file(vars)
        logger.info("Starting Docker container with configuration:\n%s" %
                    docker_cfg)
        docker_cfg_path = self._docker_cfg_path()
        with open(docker_cfg_path, "wt") as f:
            f.write(docker_cfg)

        c = cluster.Cluster([], t)

        self._start_process(cmd="docker-compose -f %s up" % docker_cfg_path,
                            node_name="rally0")
        # Wait for a little while: Plugins may still be initializing although the node has already started.
        time.sleep(10)

        t.attach_to_cluster(c)
        logger.info("Successfully started Docker container")
        return c