示例#1
0
    def _read_output(self, node_name, server, startup_event):
        """
        Reads the output from the ES (node) subprocess.
        """
        while True:
            l = server.stdout.readline().decode("utf-8")
            if len(l) == 0:
                # no more output -> the process has terminated. We can give up now
                startup_event.set()
                break
            l = l.rstrip()
            # don't log each output line as it is contained in the node's log files anyway and we just risk spamming our own log.
            if not startup_event.isSet():
                logger.info("%s: %s" %
                            (node_name,
                             l.replace("\n", "\n%s (stdout): " % node_name)))

            if l.find("Initialization Failed") != -1 or l.find(
                    "A fatal exception has occurred") != -1:
                logger.error("[%s] encountered initialization errors." %
                             node_name)
                # wait a moment to ensure the process has terminated before we signal that we detected a (failed) startup.
                wait = 5
                while not server.returncode or wait == 0:
                    time.sleep(0.1)
                    server.poll()
                    wait -= 1
                startup_event.set()
            if l.endswith("started") and not startup_event.isSet():
                startup_event.set()
                logger.info("[%s] has successfully started." % node_name)
示例#2
0
 def await_termination(self, server, timeout=5):
     # wait a moment to ensure the process has terminated
     wait = timeout
     while not server.returncode or wait == 0:
         time.sleep(0.1)
         server.poll()
         wait -= 1
示例#3
0
文件: cluster.py 项目: monk-ee/rally
 def _do_wait(self, expected_cluster_status):
     reached_cluster_status = None
     for attempt in range(10):
         try:
             result = self.client.cluster.health(
                 wait_for_status=expected_cluster_status,
                 wait_for_relocating_shards=0,
                 timeout="3s")
         except (socket.timeout, elasticsearch.exceptions.ConnectionError,
                 elasticsearch.exceptions.TransportError):
             pass
         else:
             reached_cluster_status = result["status"]
             relocating_shards = result["relocating_shards"]
             logger.info("GOT: %s" % str(result))
             logger.info("ALLOC:\n%s" % self.client.cat.allocation(v=True))
             logger.info("RECOVERY:\n%s" % self.client.cat.recovery(v=True))
             logger.info("SHARDS:\n%s" % self.client.cat.shards(v=True))
             if reached_cluster_status == expected_cluster_status and relocating_shards == 0:
                 return reached_cluster_status, relocating_shards
             else:
                 time.sleep(0.5)
     msg = "Cluster did not reach status [%s]. Last reached status: [%s]" % (
         expected_cluster_status, reached_cluster_status)
     logger.error(msg)
     raise exceptions.LaunchError(msg)
示例#4
0
    def instrument_env(self, car, candidate_id):
        io.ensure_dir(self.log_root)
        log_file = "%s/%s-%s.jfr" % (self.log_root, car.safe_name, candidate_id)

        console.println("\n***************************************************************************\n")
        console.println("[WARNING] Java flight recorder is a commercial feature of the Oracle JDK.\n")
        console.println("You are using Java flight recorder which requires that you comply with\nthe licensing terms stated in:\n")
        console.println(console.format.link("http://www.oracle.com/technetwork/java/javase/terms/license/index.html"))
        console.println("\nBy using this feature you confirm that you comply with these license terms.\n")
        console.println("Otherwise, please abort and rerun Rally without the \"jfr\" telemetry device.")
        console.println("\n***************************************************************************\n")

        time.sleep(3)

        console.info("%s: Writing flight recording to [%s]" % (self.human_name, log_file), logger=logger)
        # this is more robust in case we want to use custom settings
        # see http://stackoverflow.com/questions/34882035/how-to-record-allocations-with-jfr-on-command-line
        #
        # in that case change to: -XX:StartFlightRecording=defaultrecording=true,settings=es-memory-profiling
        if self.java_major_version < 9:
            return {"ES_JAVA_OPTS": "-XX:+UnlockDiagnosticVMOptions -XX:+UnlockCommercialFeatures -XX:+DebugNonSafepoints "
                                    "-XX:+FlightRecorder "
                                    "-XX:FlightRecorderOptions=disk=true,maxage=0s,maxsize=0,dumponexit=true,dumponexitpath=%s "
                                    "-XX:StartFlightRecording=defaultrecording=true" % log_file}
        else:
            return {"ES_JAVA_OPTS": "-XX:+UnlockDiagnosticVMOptions -XX:+UnlockCommercialFeatures -XX:+DebugNonSafepoints "
                                    "-XX:StartFlightRecording=maxsize=0,maxage=0s,disk=true,dumponexit=true,filename=%s" % log_file}
示例#5
0
    def on_start_engine(self, msg, sender):
        logger.info("Received signal from race control to start engine.")
        self.race_control = sender
        self.cfg = msg.cfg
        cls = metrics.metrics_store_class(self.cfg)
        self.metrics_store = cls(self.cfg)
        self.metrics_store.open(ctx=msg.open_metrics_context)

        # In our startup procedure we first create all mechanics. Only if this succeeds we'll continue.
        mechanics_and_start_message = []
        hosts = self.cfg.opts("client", "hosts")
        if len(hosts) == 0:
            raise exceptions.LaunchError("No target hosts are configured.")

        if msg.external:
            logger.info("Cluster will not be provisioned by Rally.")
            # just create one actor for this special case and run it on the coordinator node (i.e. here)
            m = self.createActor(NodeMechanicActor,
                                 #globalName="/rally/mechanic/worker/external",
                                 targetActorRequirements={"coordinator": True})
            self.children.append(m)
            mechanics_and_start_message.append((m, msg.for_nodes(ip=hosts)))
        else:
            logger.info("Cluster consisting of %s will be provisioned by Rally." % hosts)
            all_ips_and_ports = to_ip_port(hosts)
            all_node_ips = extract_all_node_ips(all_ips_and_ports)
            for ip_port, nodes in nodes_by_host(all_ips_and_ports).items():
                ip, port = ip_port
                if ip == "127.0.0.1":
                    m = self.createActor(NodeMechanicActor,
                                         #globalName="/rally/mechanic/worker/localhost",
                                         targetActorRequirements={"coordinator": True})
                    self.children.append(m)
                    mechanics_and_start_message.append((m, msg.for_nodes(all_node_ips, ip, port, nodes)))
                else:
                    if self.cfg.opts("system", "remote.benchmarking.supported"):
                        logger.info("Benchmarking against %s with external Rally daemon." % hosts)
                    else:
                        logger.error("User tried to benchmark against %s but no external Rally daemon has been started." % hosts)
                        raise exceptions.SystemSetupError("To benchmark remote hosts (e.g. %s) you need to start the Rally daemon "
                                                          "on each machine including this one." % ip)
                    already_running = actor.actor_system_already_running(ip=ip)
                    logger.info("Actor system on [%s] already running? [%s]" % (ip, str(already_running)))
                    if not already_running:
                        console.println("Waiting for Rally daemon on [%s] " % ip, end="", flush=True)
                    while not actor.actor_system_already_running(ip=ip):
                        console.println(".", end="", flush=True)
                        time.sleep(3)
                    if not already_running:
                        console.println(" [OK]")
                    m = self.createActor(NodeMechanicActor,
                                         #globalName="/rally/mechanic/worker/%s" % ip,
                                         targetActorRequirements={"ip": ip})
                    mechanics_and_start_message.append((m, msg.for_nodes(all_node_ips, ip, port, nodes)))
                    self.children.append(m)
        self.status = "starting"
        self.received_responses = []
        for mechanic_actor, start_message in mechanics_and_start_message:
            self.send(mechanic_actor, start_message)
示例#6
0
 def run(self):
     # noinspection PyBroadException
     try:
         while not self.stop:
             self.recorder.record()
             time.sleep(self.recorder.sample_interval)
     except BaseException as e:
         logger.exception("Could not determine {}".format(self.recorder))
示例#7
0
    def on_start_engine(self, msg, sender):
        logger.info("Received signal from race control to start engine.")
        self.race_control = sender
        self.cfg = msg.cfg
        self.metrics_store = metrics.InMemoryMetricsStore(self.cfg)
        self.metrics_store.open(ctx=msg.open_metrics_context)

        # In our startup procedure we first create all mechanics. Only if this succeeds we'll continue.
        mechanics_and_start_message = []
        hosts = self.cfg.opts("client", "hosts")
        if len(hosts) == 0:
            raise exceptions.LaunchError("No target hosts are configured.")

        if msg.external:
            logger.info("Cluster will not be provisioned by Rally.")
            # just create one actor for this special case and run it on the coordinator node (i.e. here)
            m = self.createActor(NodeMechanicActor,
                                 #globalName="/rally/mechanic/worker/external",
                                 targetActorRequirements={"coordinator": True})
            self.children.append(m)
            mechanics_and_start_message.append((m, msg.for_nodes(ip=hosts)))
        else:
            logger.info("Cluster consisting of %s will be provisioned by Rally." % hosts)
            all_ips_and_ports = to_ip_port(hosts)
            all_node_ips = extract_all_node_ips(all_ips_and_ports)
            for ip_port, nodes in nodes_by_host(all_ips_and_ports).items():
                ip, port = ip_port
                if ip == "127.0.0.1":
                    m = self.createActor(NodeMechanicActor,
                                         #globalName="/rally/mechanic/worker/localhost",
                                         targetActorRequirements={"coordinator": True})
                    self.children.append(m)
                    mechanics_and_start_message.append((m, msg.for_nodes(all_node_ips, ip, port, nodes)))
                else:
                    if self.cfg.opts("system", "remote.benchmarking.supported"):
                        logger.info("Benchmarking against %s with external Rally daemon." % hosts)
                    else:
                        logger.error("User tried to benchmark against %s but no external Rally daemon has been started." % hosts)
                        raise exceptions.SystemSetupError("To benchmark remote hosts (e.g. %s) you need to start the Rally daemon "
                                                          "on each machine including this one." % ip)
                    already_running = actor.actor_system_already_running(ip=ip)
                    logger.info("Actor system on [%s] already running? [%s]" % (ip, str(already_running)))
                    if not already_running:
                        console.println("Waiting for Rally daemon on [%s] " % ip, end="", flush=True)
                    while not actor.actor_system_already_running(ip=ip):
                        console.println(".", end="", flush=True)
                        time.sleep(3)
                    if not already_running:
                        console.println(" [OK]")
                    m = self.createActor(NodeMechanicActor,
                                         #globalName="/rally/mechanic/worker/%s" % ip,
                                         targetActorRequirements={"ip": ip})
                    mechanics_and_start_message.append((m, msg.for_nodes(all_node_ips, ip, port, nodes)))
                    self.children.append(m)
        self.status = "starting"
        self.received_responses = []
        for mechanic_actor, start_message in mechanics_and_start_message:
            self.send(mechanic_actor, start_message)
示例#8
0
文件: launcher.py 项目: elastic/rally
    def start(self, car):
        # hardcoded for the moment, should actually be identical to internal launcher
        # Only needed on Mac:
        # hosts = [{"host": process.run_subprocess_with_output("docker-machine ip default")[0].strip(), "port": 9200}]
        hosts = [{"host": "localhost", "port": 9200}]
        client_options = self.cfg.opts("launcher", "client.options")
        # unified client config
        self.cfg.add(config.Scope.benchmark, "client", "hosts", hosts)
        self.cfg.add(config.Scope.benchmark, "client", "options", client_options)

        es = self.client_factory(hosts, client_options).create()

        t = telemetry.Telemetry(self.cfg, devices=[
            # Be aware that some the meta-data are taken from the host system, not the container (e.g. number of CPU cores) so if the
            # Docker container constrains these, the metrics are actually wrong.
            telemetry.EnvironmentInfo(self.cfg, es, self.metrics_store),
            telemetry.NodeStats(self.cfg, es, self.metrics_store),
            telemetry.IndexStats(self.cfg, es, self.metrics_store),
            telemetry.DiskIo(self.cfg, self.metrics_store),
            telemetry.CpuUsage(self.cfg, self.metrics_store)
        ])

        distribution_version = self.cfg.opts("source", "distribution.version", mandatory=False)

        install_dir = self._install_dir()
        io.ensure_dir(install_dir)

        java_opts = ""
        if car.heap:
            java_opts += "-Xms%s -Xmx%s " % (car.heap, car.heap)
        if car.java_opts:
            java_opts += car.java_opts

        vars = {
            "es_java_opts": java_opts,
            "container_memory_gb": "%dg" % (convert.bytes_to_gb(psutil.virtual_memory().total) // 2),
            "es_data_dir": "%s/data" % install_dir,
            "es_version": distribution_version
        }

        docker_cfg = self._render_template_from_file(vars)
        logger.info("Starting Docker container with configuration:\n%s" % docker_cfg)
        docker_cfg_path = self._docker_cfg_path()
        with open(docker_cfg_path, "wt") as f:
            f.write(docker_cfg)

        c = cluster.Cluster([], t)

        self._start_process(cmd="docker-compose -f %s up" % docker_cfg_path, node_name="rally0")
        # Wait for a little while: Plugins may still be initializing although the node has already started.
        time.sleep(10)

        t.attach_to_cluster(c)
        logger.info("Successfully started Docker container")
        return c
示例#9
0
def wait_for_pidfile(pidfilename, timeout=60):
    endtime = _time() + timeout
    while _time() < endtime:
        try:
            with open(pidfilename, "rb") as f:
                return int(f.read())
        except FileNotFoundError:
            time.sleep(0.5)

    msg = "pid file not available after {} seconds!".format(timeout)
    logging.error(msg)
    raise exceptions.LaunchError(msg)
示例#10
0
 def _wait_for_healthy_running_container(self, container_id, timeout):
     cmd = 'docker ps -a --filter "id={}" --filter "status=running" --filter "health=healthy" -q'.format(container_id)
     stop_watch = self.clock.stop_watch()
     stop_watch.start()
     while stop_watch.split_time() < timeout:
         containers = process.run_subprocess_with_output(cmd)
         if len(containers) > 0:
             return
         time.sleep(0.5)
     msg = "No healthy running container after {} seconds!".format(timeout)
     logging.error(msg)
     raise exceptions.LaunchError(msg)
示例#11
0
def _wait_for_healthy_running_container(container_id, timeout=60):
    cmd = 'docker ps -a --filter "id={}" --filter "status=running" --filter "health=healthy" -q'.format(container_id)
    endtime = _time() + timeout
    while _time() < endtime:
        output = subprocess.check_output(shlex.split(cmd))
        containers = output.decode("utf-8").rstrip()
        if len(containers) > 0:
            return
        time.sleep(0.5)
    msg = "No healthy running container after {} seconds!".format(timeout)
    logging.error(msg)
    raise exceptions.LaunchError(msg)
示例#12
0
def wait_for_pidfile(pidfilename, timeout=60, clock=time.Clock):
    stop_watch = clock.stop_watch()
    stop_watch.start()
    while stop_watch.split_time() < timeout:
        try:
            with open(pidfilename, "rb") as f:
                return int(f.read())
        except FileNotFoundError:
            time.sleep(0.5)

    msg = "pid file not available after {} seconds!".format(timeout)
    logging.error(msg)
    raise exceptions.LaunchError(msg)
示例#13
0
def wait_for_rest_layer(es, max_attempts=20):
    for attempt in range(max_attempts):
        import elasticsearch
        try:
            es.info()
            return True
        except elasticsearch.TransportError as e:
            if e.status_code == 503 or isinstance(e, elasticsearch.ConnectionError):
                time.sleep(1)
            elif e.status_code == 401:
                time.sleep(1)
            else:
                raise e
    return False
示例#14
0
def wait_for_rest_layer(es, max_attempts=10):
    for attempt in range(max_attempts):
        import elasticsearch
        try:
            es.info()
            return True
        except elasticsearch.TransportError as e:
            if e.status_code == 503 or isinstance(e, elasticsearch.ConnectionError):
                logger.debug("Elasticsearch REST API is not available yet (probably cluster block).")
                time.sleep(2)
            elif e.status_code == 401:
                logger.debug("Could not authenticate yet (probably x-pack initializing).")
                time.sleep(2)
            else:
                raise e
    return False
示例#15
0
def wait_for_rest_layer(es, max_attempts=10):
    for attempt in range(max_attempts):
        import elasticsearch
        try:
            es.info()
            return True
        except elasticsearch.TransportError as e:
            if e.status_code == 503 or isinstance(e, elasticsearch.ConnectionError):
                logger.debug("Elasticsearch REST API is not available yet (probably cluster block).")
                time.sleep(2)
            elif e.status_code == 401:
                logger.debug("Could not authenticate yet (probably x-pack initializing).")
                time.sleep(2)
            else:
                raise e
    return False
示例#16
0
 def _do_wait(self, expected_cluster_status):
     reached_cluster_status = None
     for attempt in range(10):
         try:
             result = self.client.cluster.health(wait_for_status=expected_cluster_status, wait_for_relocating_shards=0, timeout="3s")
         except (socket.timeout, elasticsearch.exceptions.ConnectionError, elasticsearch.exceptions.TransportError):
             pass
         else:
             reached_cluster_status = result["status"]
             relocating_shards = result["relocating_shards"]
             logger.info("GOT: %s" % str(result))
             logger.info("ALLOC:\n%s" % self.client.cat.allocation(v=True))
             logger.info("RECOVERY:\n%s" % self.client.cat.recovery(v=True))
             logger.info("SHARDS:\n%s" % self.client.cat.shards(v=True))
             if reached_cluster_status == expected_cluster_status and relocating_shards == 0:
                 return reached_cluster_status, relocating_shards
             else:
                 time.sleep(0.5)
     msg = "Cluster did not reach status [%s]. Last reached status: [%s]" % (expected_cluster_status, reached_cluster_status)
     logger.error(msg)
     raise exceptions.LaunchError(msg)
示例#17
0
    def instrument_env(self, car, candidate_id):
        io.ensure_dir(self.log_root)
        log_file = "%s/%s-%s.jfr" % (self.log_root, car.safe_name,
                                     candidate_id)

        console.println(
            "\n***************************************************************************\n"
        )
        console.println(
            "[WARNING] Java flight recorder is a commercial feature of the Oracle JDK.\n"
        )
        console.println(
            "You are using Java flight recorder which requires that you comply with\nthe licensing terms stated in:\n"
        )
        console.println(
            console.format.link(
                "http://www.oracle.com/technetwork/java/javase/terms/license/index.html"
            ))
        console.println(
            "\nBy using this feature you confirm that you comply with these license terms.\n"
        )
        console.println(
            "Otherwise, please abort and rerun Rally without the \"jfr\" telemetry device."
        )
        console.println(
            "\n***************************************************************************\n"
        )

        time.sleep(3)

        console.info("%s: Writing flight recording to [%s]" %
                     (self.human_name, log_file),
                     logger=logger)

        java_opts = self.java_opts(log_file)

        logger.info("jfr: Adding JVM arguments: [%s].", java_opts)
        return {"ES_JAVA_OPTS": java_opts}
示例#18
0
    def record(self):
        current_sample = self.sample()
        for node_stats in current_sample:
            node_name = node_stats["name"]
            if self.include_indices:
                self.record_indices_stats(node_name,
                                          node_stats,
                                          include=[
                                              "docs", "store", "indexing",
                                              "search", "merges",
                                              "query_cache", "fielddata",
                                              "segments", "translog",
                                              "request_cache"
                                          ])
            if self.include_thread_pools:
                self.record_thread_pool_stats(node_name, node_stats)
            if self.include_breakers:
                self.record_circuit_breaker_stats(node_name, node_stats)
            if self.include_buffer_pools:
                self.record_jvm_buffer_pool_stats(node_name, node_stats)
            if self.include_network:
                self.record_network_stats(node_name, node_stats)

        time.sleep(self.sample_interval)
示例#19
0
def wait_for_rest_layer(es, max_attempts=20):
    for attempt in range(max_attempts):
        import elasticsearch
        try:
            es.info()
            return True
        except elasticsearch.ConnectionError as e:
            if "SSL: UNKNOWN_PROTOCOL" in str(e):
                raise exceptions.LaunchError("Could not connect to cluster via https. Is this a https endpoint?", e)
            else:
                time.sleep(1)
        except elasticsearch.TransportError as e:
            if e.status_code == 503:
                time.sleep(1)
            elif e.status_code == 401:
                time.sleep(1)
            else:
                raise e
    return False
示例#20
0
def migrate(config_file, current_version, target_version, out=print, i=input):
    prompter = Prompter(i=i, o=out, assume_defaults=False)
    logger.info("Upgrading configuration from version [%s] to [%s]." %
                (current_version, target_version))
    # Something is really fishy. We don't want to downgrade the configuration.
    if current_version >= target_version:
        raise ConfigError(
            "The existing config file is available in a later version already. Expected version <= [%s] but found [%s]"
            % (target_version, current_version))
    # but first a backup...
    config_file.backup()
    config = config_file.load(interpolation=None)

    if current_version == 0 and target_version > current_version:
        logger.info("Migrating config from version [0] to [1]")
        current_version = 1
        config["meta"] = {}
        config["meta"]["config.version"] = str(current_version)
        # in version 1 we changed some directories from being absolute to being relative
        config["system"]["log.root.dir"] = "logs"
        config["provisioning"]["local.install.dir"] = "install"
        config["reporting"]["report.base.dir"] = "reports"
    if current_version == 1 and target_version > current_version:
        logger.info("Migrating config from version [1] to [2]")
        current_version = 2
        config["meta"]["config.version"] = str(current_version)
        # no need to ask the user now if we are about to upgrade to version 4
        config["reporting"]["datastore.type"] = "in-memory"
        config["reporting"]["datastore.host"] = ""
        config["reporting"]["datastore.port"] = ""
        config["reporting"]["datastore.secure"] = ""
        config["reporting"]["datastore.user"] = ""
        config["reporting"]["datastore.password"] = ""
        config["system"]["env.name"] = "local"
    if current_version == 2 and target_version > current_version:
        logger.info("Migrating config from version [2] to [3]")
        current_version = 3
        config["meta"]["config.version"] = str(current_version)
        # Remove obsolete settings
        config["reporting"].pop("report.base.dir")
        config["reporting"].pop("output.html.report.filename")
    if current_version == 3 and target_version > current_version:
        root_dir = config["system"]["root.dir"]
        out("""
            *****************************************************************************************

            You have an old configuration of Rally. Rally has now a much simpler setup
            routine which will autodetect lots of settings for you and it also does not
            require you to setup a metrics store anymore.

            Rally will now migrate your configuration but if you don't need advanced features
            like a metrics store, then you should delete the configuration directory:

              rm -rf {0}

            and then rerun Rally's configuration routine:

              {1} configure

            Please also note you have {2:.1f} GB of data in your current benchmark directory at

              {3}

            You might want to clean up this directory also.

            For more details please see {4}

            *****************************************************************************************

            Pausing for 10 seconds to let you consider this message.
            """.format(
            config_file.config_dir, PROGRAM_NAME,
            convert.bytes_to_gb(io.get_size(root_dir)), root_dir,
            console.format.link(
                "https://github.com/elastic/rally/blob/master/CHANGELOG.md#030"
            )))
        time.sleep(10)
        logger.info("Migrating config from version [3] to [4]")
        current_version = 4
        config["meta"]["config.version"] = str(current_version)
        if len(config["reporting"]["datastore.host"]) > 0:
            config["reporting"]["datastore.type"] = "elasticsearch"
        else:
            config["reporting"]["datastore.type"] = "in-memory"
        # Remove obsolete settings
        config["build"].pop("maven.bin")
        config["benchmarks"].pop("metrics.stats.disk.device")

    if current_version == 4 and target_version > current_version:
        config["tracks"] = {}
        config["tracks"][
            "default.url"] = "https://github.com/elastic/rally-tracks"
        current_version = 5
        config["meta"]["config.version"] = str(current_version)

    if current_version == 5 and target_version > current_version:
        config["defaults"] = {}
        config["defaults"]["preserve_benchmark_candidate"] = str(False)
        current_version = 6
        config["meta"]["config.version"] = str(current_version)

    if current_version == 6 and target_version > current_version:
        # Remove obsolete settings
        config.pop("provisioning")
        config["system"].pop("log.root.dir")
        current_version = 7
        config["meta"]["config.version"] = str(current_version)

    if current_version == 7 and target_version > current_version:
        # move [system][root.dir] to [node][root.dir]
        if "node" not in config:
            config["node"] = {}
        config["node"]["root.dir"] = config["system"].pop("root.dir")
        # also move all references!
        for section in config:
            for k, v in config[section].items():
                config[section][k] = v.replace("${system:root.dir}",
                                               "${node:root.dir}")
        current_version = 8
        config["meta"]["config.version"] = str(current_version)
    if current_version == 8 and target_version > current_version:
        config["teams"] = {}
        config["teams"][
            "default.url"] = "https://github.com/elastic/rally-teams"
        current_version = 9
        config["meta"]["config.version"] = str(current_version)
    if current_version == 9 and target_version > current_version:
        config["distributions"] = {}
        config["distributions"]["release.1.url"] = "https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-" \
                                                   "{{VERSION}}.tar.gz"
        config["distributions"]["release.2.url"] = "https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/" \
                                                   "distribution/tar/elasticsearch/{{VERSION}}/elasticsearch-{{VERSION}}.tar.gz"
        config["distributions"][
            "release.url"] = "https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-{{VERSION}}.tar.gz"
        config["distributions"]["release.cache"] = "true"
        current_version = 10
        config["meta"]["config.version"] = str(current_version)
    if current_version == 10 and target_version > current_version:
        config["runtime"]["java.home"] = config["runtime"].pop("java8.home")
        current_version = 11
        config["meta"]["config.version"] = str(current_version)
    if current_version == 11 and target_version > current_version:
        # As this is a rather complex migration, we log more than usual to understand potential migration problems better.
        if "source" in config:
            if "local.src.dir" in config["source"]:
                previous_root = config["source"].pop("local.src.dir")
                logger.info("Set [source][local.src.dir] to [%s]." %
                            previous_root)
                # if this directory was Rally's default location, then move it on the file system because to allow for checkouts of plugins
                # in the sibling directory.
                if previous_root == os.path.join(config["node"]["root.dir"],
                                                 "src"):
                    new_root_dir_all_sources = previous_root
                    new_es_sub_dir = "elasticsearch"
                    new_root = os.path.join(new_root_dir_all_sources,
                                            new_es_sub_dir)
                    # only attempt to move if the directory exists. It may be possible that users never ran a source benchmark although they
                    # have configured it. In that case the source directory will not yet exist.
                    if io.exists(previous_root):
                        logger.info(
                            "Previous source directory was at Rally's default location [%s]. Moving to [%s]."
                            % (previous_root, new_root))
                        try:
                            # we need to do this in two steps as we need to move the sources to a subdirectory
                            tmp_path = io.normalize_path(
                                os.path.join(new_root_dir_all_sources,
                                             os.pardir, "tmp_src_mig"))
                            os.rename(previous_root, tmp_path)
                            io.ensure_dir(new_root)
                            os.rename(tmp_path, new_root)
                        except OSError:
                            logger.exception(
                                "Could not move source directory from [%s] to [%s]."
                                % (previous_root, new_root))
                            # A warning is sufficient as Rally should just do a fresh checkout if moving did not work.
                            console.warn(
                                "Elasticsearch source directory could not be moved from [%s] to [%s]. Please check the logs."
                                % (previous_root, new_root))
                    else:
                        logger.info(
                            "Source directory is configured at Rally's default location [%s] but does not exist yet."
                            % previous_root)
                else:
                    logger.info(
                        "Previous source directory was the custom directory [%s]."
                        % previous_root)
                    new_root_dir_all_sources = io.normalize_path(
                        os.path.join(previous_root, os.path.pardir))
                    # name of the elasticsearch project directory.
                    new_es_sub_dir = io.basename(previous_root)

                logger.info("Setting [node][src.root.dir] to [%s]." %
                            new_root_dir_all_sources)
                config["node"]["src.root.dir"] = new_root_dir_all_sources
                logger.info(
                    "Setting [source][elasticsearch.src.subdir] to [%s]" %
                    new_es_sub_dir)
                config["source"]["elasticsearch.src.subdir"] = new_es_sub_dir
            else:
                logger.info(
                    "Key [local.src.dir] not found. Advancing without changes."
                )
        else:
            logger.info(
                "No section named [source] found in config. Advancing without changes."
            )
        current_version = 12
        config["meta"]["config.version"] = str(current_version)

    if current_version == 12 and target_version > current_version:
        # the current configuration allows to benchmark from sources
        if "build" in config and "gradle.bin" in config["build"]:
            java_9_home = io.guess_java_home(major_version=9)
            from esrally.utils import jvm
            if java_9_home and not jvm.is_early_access_release(java_9_home):
                logger.debug("Autodetected a JDK 9 installation at [%s]" %
                             java_9_home)
                if "runtime" not in config:
                    config["runtime"] = {}
                config["runtime"]["java9.home"] = java_9_home
            else:
                logger.debug(
                    "Could not autodetect a JDK 9 installation. Checking [java.home] already points to a JDK 9."
                )
                detected = False
                if "runtime" in config:
                    java_home = config["runtime"]["java.home"]
                    if jvm.major_version(
                            java_home
                    ) == 9 and not jvm.is_early_access_release(java_home):
                        config["runtime"]["java9.home"] = java_home
                        detected = True

                if not detected:
                    logger.debug(
                        "Could not autodetect a JDK 9 installation. Asking user."
                    )
                    raw_java_9_home = prompter.ask_property(
                        "Enter the JDK 9 root directory",
                        check_path_exists=True,
                        mandatory=False)
                    if raw_java_9_home and jvm.major_version(
                            raw_java_9_home
                    ) == 9 and not jvm.is_early_access_release(
                            raw_java_9_home):
                        java_9_home = io.normalize_path(
                            raw_java_9_home) if raw_java_9_home else None
                        config["runtime"]["java9.home"] = java_9_home
                    else:
                        out("********************************************************************************"
                            )
                        out("You don't have a valid JDK 9 installation and cannot benchmark source builds."
                            )
                        out("")
                        out("You can still benchmark binary distributions with e.g.:"
                            )
                        out("")
                        out("  %s --distribution-version=6.0.0" % PROGRAM_NAME)
                        out("********************************************************************************"
                            )
                        out("")

        current_version = 13
        config["meta"]["config.version"] = str(current_version)

    # all migrations done
    config_file.store(config)
    logger.info("Successfully self-upgraded configuration to version [%s]" %
                target_version)
示例#21
0
def migrate(config_file, current_version, target_version, out=print):
    logger.info("Upgrading configuration from version [%s] to [%s]." %
                (current_version, target_version))
    # Something is really fishy. We don't want to downgrade the configuration.
    if current_version >= target_version:
        raise ConfigError(
            "The existing config file is available in a later version already. Expected version <= [%s] but found [%s]"
            % (target_version, current_version))
    # but first a backup...
    config_file.backup()
    config = config_file.load(interpolation=None)

    if current_version == 0 and target_version > current_version:
        logger.info("Migrating config from version [0] to [1]")
        current_version = 1
        config["meta"] = {}
        config["meta"]["config.version"] = str(current_version)
        # in version 1 we changed some directories from being absolute to being relative
        config["system"]["log.root.dir"] = "logs"
        config["provisioning"]["local.install.dir"] = "install"
        config["reporting"]["report.base.dir"] = "reports"
    if current_version == 1 and target_version > current_version:
        logger.info("Migrating config from version [1] to [2]")
        current_version = 2
        config["meta"]["config.version"] = str(current_version)
        # no need to ask the user now if we are about to upgrade to version 4
        config["reporting"]["datastore.type"] = "in-memory"
        config["reporting"]["datastore.host"] = ""
        config["reporting"]["datastore.port"] = ""
        config["reporting"]["datastore.secure"] = ""
        config["reporting"]["datastore.user"] = ""
        config["reporting"]["datastore.password"] = ""
        config["system"]["env.name"] = "local"
    if current_version == 2 and target_version > current_version:
        logger.info("Migrating config from version [2] to [3]")
        current_version = 3
        config["meta"]["config.version"] = str(current_version)
        # Remove obsolete settings
        config["reporting"].pop("report.base.dir")
        config["reporting"].pop("output.html.report.filename")
    if current_version == 3 and target_version > current_version:
        root_dir = config["system"]["root.dir"]
        out("*****************************************************************************************"
            )
        out("")
        out("You have an old configuration of Rally. Rally has now a much simpler setup"
            )
        out("routine which will autodetect lots of settings for you and it also does not"
            )
        out("require you to setup a metrics store anymore.")
        out("")
        out("Rally will now migrate your configuration but if you don't need advanced features"
            )
        out("like a metrics store, then you should delete the configuration directory:"
            )
        out("")
        out("  rm -rf %s" % config_file.config_dir)
        out("")
        out("and then rerun Rally's configuration routine:")
        out("")
        out("  %s configure" % PROGRAM_NAME)
        out("")
        out("Please also note you have %.1f GB of data in your current benchmark directory at"
            % convert.bytes_to_gb(io.get_size(root_dir)))
        out()
        out("  %s" % root_dir)
        out("")
        out("You might want to clean up this directory also.")
        out()
        out("For more details please see %s" % console.format.link(
            "https://github.com/elastic/rally/blob/master/CHANGELOG.md#030"))
        out("")
        out("*****************************************************************************************"
            )
        out("")
        out("Pausing for 10 seconds to let you consider this message.")
        time.sleep(10)
        logger.info("Migrating config from version [3] to [4]")
        current_version = 4
        config["meta"]["config.version"] = str(current_version)
        if len(config["reporting"]["datastore.host"]) > 0:
            config["reporting"]["datastore.type"] = "elasticsearch"
        else:
            config["reporting"]["datastore.type"] = "in-memory"
        # Remove obsolete settings
        config["build"].pop("maven.bin")
        config["benchmarks"].pop("metrics.stats.disk.device")

    if current_version == 4 and target_version > current_version:
        config["tracks"] = {}
        config["tracks"][
            "default.url"] = "https://github.com/elastic/rally-tracks"
        current_version = 5
        config["meta"]["config.version"] = str(current_version)

    if current_version == 5 and target_version > current_version:
        config["defaults"] = {}
        config["defaults"]["preserve_benchmark_candidate"] = str(False)
        current_version = 6
        config["meta"]["config.version"] = str(current_version)

    if current_version == 6 and target_version > current_version:
        # Remove obsolete settings
        config.pop("provisioning")
        config["system"].pop("log.root.dir")
        current_version = 7
        config["meta"]["config.version"] = str(current_version)

    if current_version == 7 and target_version > current_version:
        # move [system][root.dir] to [node][root.dir]
        if "node" not in config:
            config["node"] = {}
        config["node"]["root.dir"] = config["system"].pop("root.dir")
        # also move all references!
        for section in config:
            for k, v in config[section].items():
                config[section][k] = v.replace("${system:root.dir}",
                                               "${node:root.dir}")
        current_version = 8
        config["meta"]["config.version"] = str(current_version)
    if current_version == 8 and target_version > current_version:
        config["teams"] = {}
        config["teams"][
            "default.url"] = "https://github.com/elastic/rally-teams"
        current_version = 9
        config["meta"]["config.version"] = str(current_version)
    if current_version == 9 and target_version > current_version:
        config["distributions"] = {}
        config["distributions"]["release.1.url"] = "https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-" \
                                                   "{{VERSION}}.tar.gz"
        config["distributions"]["release.2.url"] = "https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/" \
                                                   "distribution/tar/elasticsearch/{{VERSION}}/elasticsearch-{{VERSION}}.tar.gz"
        config["distributions"][
            "release.url"] = "https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-{{VERSION}}.tar.gz"
        config["distributions"]["release.cache"] = "true"
        current_version = 10
        config["meta"]["config.version"] = str(current_version)
    if current_version == 10 and target_version > current_version:
        config["runtime"]["java.home"] = config["runtime"].pop("java8.home")
        current_version = 11
        config["meta"]["config.version"] = str(current_version)

    # all migrations done
    config_file.store(config)
    logger.info("Successfully self-upgraded configuration to version [%s]" %
                target_version)
示例#22
0
文件: config.py 项目: elastic/rally
def migrate(config_file, current_version, target_version, out=print):
    logger.info("Upgrading configuration from version [%s] to [%s]." % (current_version, target_version))
    # Something is really fishy. We don't want to downgrade the configuration.
    if current_version >= target_version:
        raise ConfigError("The existing config file is available in a later version already. Expected version <= [%s] but found [%s]"
                          % (target_version, current_version))
    # but first a backup...
    config_file.backup()
    config = config_file.load(interpolation=None)

    if current_version == 0 and target_version > current_version:
        logger.info("Migrating config from version [0] to [1]")
        current_version = 1
        config["meta"] = {}
        config["meta"]["config.version"] = str(current_version)
        # in version 1 we changed some directories from being absolute to being relative
        config["system"]["log.root.dir"] = "logs"
        config["provisioning"]["local.install.dir"] = "install"
        config["reporting"]["report.base.dir"] = "reports"
    if current_version == 1 and target_version > current_version:
        logger.info("Migrating config from version [1] to [2]")
        current_version = 2
        config["meta"]["config.version"] = str(current_version)
        # no need to ask the user now if we are about to upgrade to version 4
        config["reporting"]["datastore.type"] = "in-memory"
        config["reporting"]["datastore.host"] = ""
        config["reporting"]["datastore.port"] = ""
        config["reporting"]["datastore.secure"] = ""
        config["reporting"]["datastore.user"] = ""
        config["reporting"]["datastore.password"] = ""
        config["system"]["env.name"] = "local"
    if current_version == 2 and target_version > current_version:
        logger.info("Migrating config from version [2] to [3]")
        current_version = 3
        config["meta"]["config.version"] = str(current_version)
        # Remove obsolete settings
        config["reporting"].pop("report.base.dir")
        config["reporting"].pop("output.html.report.filename")
    if current_version == 3 and target_version > current_version:
        root_dir = config["system"]["root.dir"]
        out("*****************************************************************************************")
        out("")
        out("You have an old configuration of Rally. Rally has now a much simpler setup")
        out("routine which will autodetect lots of settings for you and it also does not")
        out("require you to setup a metrics store anymore.")
        out("")
        out("Rally will now migrate your configuration but if you don't need advanced features")
        out("like a metrics store, then you should delete the configuration directory:")
        out("")
        out("  rm -rf %s" % config_file.config_dir)
        out("")
        out("and then rerun Rally's configuration routine:")
        out("")
        out("  %s configure" % PROGRAM_NAME)
        out("")
        out("Please also note you have %.1f GB of data in your current benchmark directory at"
            % convert.bytes_to_gb(io.get_size(root_dir)))
        out()
        out("  %s" % root_dir)
        out("")
        out("You might want to clean up this directory also.")
        out()
        out("For more details please see %s" % console.format.link("https://github.com/elastic/rally/blob/master/CHANGELOG.md#030"))
        out("")
        out("*****************************************************************************************")
        out("")
        out("Pausing for 10 seconds to let you consider this message.")
        time.sleep(10)
        logger.info("Migrating config from version [3] to [4]")
        current_version = 4
        config["meta"]["config.version"] = str(current_version)
        if len(config["reporting"]["datastore.host"]) > 0:
            config["reporting"]["datastore.type"] = "elasticsearch"
        else:
            config["reporting"]["datastore.type"] = "in-memory"
        # Remove obsolete settings
        config["build"].pop("maven.bin")
        config["benchmarks"].pop("metrics.stats.disk.device")

    if current_version == 4 and target_version > current_version:
        config["tracks"] = {}
        config["tracks"]["default.url"] = "https://github.com/elastic/rally-tracks"
        current_version = 5
        config["meta"]["config.version"] = str(current_version)

    if current_version == 5 and target_version > current_version:
        config["defaults"] = {}
        config["defaults"]["preserve_benchmark_candidate"] = str(False)
        current_version = 6
        config["meta"]["config.version"] = str(current_version)

    # all migrations done
    config_file.store(config)
    logger.info("Successfully self-upgraded configuration to version [%s]" % target_version)
示例#23
0
    def start(self, car):
        # hardcoded for the moment, should actually be identical to internal launcher
        # Only needed on Mac:
        # hosts = [{"host": process.run_subprocess_with_output("docker-machine ip default")[0].strip(), "port": 9200}]
        hosts = [{"host": "localhost", "port": 9200}]
        client_options = self.cfg.opts("launcher", "client.options")
        # unified client config
        self.cfg.add(config.Scope.benchmark, "client", "hosts", hosts)
        self.cfg.add(config.Scope.benchmark, "client", "options",
                     client_options)

        es = self.client_factory(hosts, client_options).create()

        t = telemetry.Telemetry(
            self.cfg,
            devices=[
                # Be aware that some the meta-data are taken from the host system, not the container (e.g. number of CPU cores) so if the
                # Docker container constrains these, the metrics are actually wrong.
                telemetry.EnvironmentInfo(self.cfg, es, self.metrics_store),
                telemetry.NodeStats(self.cfg, es, self.metrics_store),
                telemetry.IndexStats(self.cfg, es, self.metrics_store),
                telemetry.DiskIo(self.cfg, self.metrics_store),
                telemetry.CpuUsage(self.cfg, self.metrics_store)
            ])

        distribution_version = self.cfg.opts("source",
                                             "distribution.version",
                                             mandatory=False)

        install_dir = self._install_dir()
        io.ensure_dir(install_dir)

        java_opts = ""
        if car.heap:
            java_opts += "-Xms%s -Xmx%s " % (car.heap, car.heap)
        if car.java_opts:
            java_opts += car.java_opts

        vars = {
            "es_java_opts":
            java_opts,
            "container_memory_gb":
            "%dg" % (convert.bytes_to_gb(psutil.virtual_memory().total) // 2),
            "es_data_dir":
            "%s/data" % install_dir,
            "es_version":
            distribution_version
        }

        docker_cfg = self._render_template_from_file(vars)
        logger.info("Starting Docker container with configuration:\n%s" %
                    docker_cfg)
        docker_cfg_path = self._docker_cfg_path()
        with open(docker_cfg_path, "wt") as f:
            f.write(docker_cfg)

        c = cluster.Cluster([], t)

        self._start_process(cmd="docker-compose -f %s up" % docker_cfg_path,
                            node_name="rally0")
        # Wait for a little while: Plugins may still be initializing although the node has already started.
        time.sleep(10)

        t.attach_to_cluster(c)
        logger.info("Successfully started Docker container")
        return c
示例#24
0
文件: mechanic.py 项目: levylll/rally
    def receiveMessage(self, msg, sender):
        try:
            logger.debug(
                "MechanicActor#receiveMessage(msg = [%s] sender = [%s])" %
                (str(type(msg)), str(sender)))
            if isinstance(msg, StartEngine):
                logger.info(
                    "Received signal from race control to start engine.")
                self.race_control = sender
                # In our startup procedure we first create all mechanics. Only if this succeeds
                mechanics_and_start_message = []

                if msg.external:
                    logger.info(
                        "Target node(s) will not be provisioned by Rally.")
                    # just create one actor for this special case and run it on the coordinator node (i.e. here)
                    m = self.createActor(
                        LocalNodeMechanicActor,
                        globalName="/rally/mechanic/worker/external",
                        targetActorRequirements={"coordinator": True})
                    self.mechanics.append(m)
                    # we can use the original message in this case
                    mechanics_and_start_message.append((m, msg))
                else:
                    hosts = msg.cfg.opts("client", "hosts")
                    logger.info(
                        "Target node(s) %s will be provisioned by Rally." %
                        hosts)
                    if len(hosts) == 0:
                        raise exceptions.LaunchError(
                            "No target hosts are configured.")
                    for host in hosts:
                        ip = host["host"]
                        port = int(host["port"])
                        # user may specify "localhost" on the command line but the problem is that we auto-register the actor system
                        # with "ip": "127.0.0.1" so we convert this special case automatically. In all other cases the user needs to
                        # start the actor system on the other host and is aware that the parameter for the actor system and the
                        # --target-hosts parameter need to match.
                        if ip == "localhost" or ip == "127.0.0.1":
                            m = self.createActor(
                                LocalNodeMechanicActor,
                                globalName="/rally/mechanic/worker/localhost",
                                targetActorRequirements={"coordinator": True})
                            self.mechanics.append(m)
                            mechanics_and_start_message.append(
                                (m, msg.with_port(port)))
                        else:
                            if msg.cfg.opts("system",
                                            "remote.benchmarking.supported"):
                                logger.info(
                                    "Benchmarking against %s with external Rally daemon."
                                    % hosts)
                            else:
                                logger.error(
                                    "User tried to benchmark against %s but no external Rally daemon has been started."
                                    % hosts)
                                raise exceptions.SystemSetupError(
                                    "To benchmark remote hosts (e.g. %s) you need to start the Rally daemon "
                                    "on each machine including this one." % ip)
                            already_running = actor.actor_system_already_running(
                                ip=ip)
                            logger.info(
                                "Actor system on [%s] already running? [%s]" %
                                (ip, str(already_running)))
                            if not already_running:
                                console.println(
                                    "Waiting for Rally daemon on [%s] " % ip,
                                    end="",
                                    flush=True)
                            while not actor.actor_system_already_running(
                                    ip=ip):
                                console.println(".", end="", flush=True)
                                time.sleep(3)
                            if not already_running:
                                console.println(" [OK]")
                            m = self.createActor(
                                RemoteNodeMechanicActor,
                                globalName="/rally/mechanic/worker/%s" % ip,
                                targetActorRequirements={"ip": ip})
                            mechanics_and_start_message.append(
                                (m, msg.with_port(port)))
                            self.mechanics.append(m)
                for mechanic_actor, start_message in mechanics_and_start_message:
                    self.send(mechanic_actor, start_message)
            elif isinstance(msg, EngineStarted):
                self.send(self.race_control, msg)
            elif isinstance(msg, OnBenchmarkStart):
                for m in self.mechanics:
                    self.send(m, msg)
            elif isinstance(msg, Success):
                self.send(self.race_control, msg)
            elif isinstance(msg, Failure):
                self.send(self.race_control, msg)
            elif isinstance(msg, OnBenchmarkStop):
                for m in self.mechanics:
                    self.send(m, msg)
            elif isinstance(msg, BenchmarkStopped):
                # TODO dm: Actually we need to wait for all BenchmarkStopped messages from all our mechanic actors
                # TODO dm: We will actually duplicate cluster level metrics if each of our mechanic actors gathers these...
                self.send(self.race_control, msg)
            elif isinstance(msg, StopEngine):
                for m in self.mechanics:
                    self.send(m, msg)
            elif isinstance(msg, EngineStopped):
                self.send(self.race_control, msg)
                # clear all state as the mechanic might get reused later
                for m in self.mechanics:
                    self.send(m, thespian.actors.ActorExitRequest())
                self.mechanics = []
                # self terminate + slave nodes
                self.send(self.myAddress, thespian.actors.ActorExitRequest())
            elif isinstance(msg, thespian.actors.ChildActorExited):
                # TODO dm: Depending on our state model this can be fine (e.g. when it exited due to our ActorExitRequest message
                # or it could be problematic and mean that an exception has occured.
                pass
            elif isinstance(msg, thespian.actors.PoisonMessage):
                # something went wrong with a child actor
                if isinstance(msg.poisonMessage, StartEngine):
                    raise exceptions.LaunchError(
                        "Could not start benchmark candidate. Are Rally daemons on all targeted machines running?"
                    )
                else:
                    logger.error(
                        "[%s] sent to a child actor has resulted in PoisonMessage"
                        % str(msg.poisonMessage))
                    raise exceptions.RallyError(
                        "Could not communicate with benchmark candidate (unknown reason)"
                    )
        except BaseException:
            logger.exception("Cannot process message [%s]" % msg)
            # usually, we'll notify the sender but in case a child sent something that caused an exception we'd rather
            # have it bubble up to race control. Otherwise, we could play ping-pong with our child actor.
            recipient = self.race_control if sender in self.mechanics else sender
            ex_type, ex_value, ex_traceback = sys.exc_info()
            # avoid "can't pickle traceback objects"
            import traceback
            self.send(
                recipient,
                Failure("Could not execute command (%s)" % ex_value,
                        traceback.format_exc()))