def configure_logging(cfg): # Even if we don't log to a file, other parts of the application rely on this path to exist -> enforce log_file = log_file_path(cfg) log_dir = os.path.dirname(log_file) io.ensure_dir(log_dir) cfg.add(config.Scope.application, "system", "log.dir", log_dir) logging_output = cfg.opts("system", "logging.output") if logging_output == "file": console.info("Writing logs to %s" % log_file) # there is an old log file lying around -> backup if os.path.exists(log_file): os.rename(log_file, "%s-bak-%d.log" % (log_file, int(os.path.getctime(log_file)))) ch = logging.FileHandler(filename=log_file, mode="a") else: ch = logging.StreamHandler(stream=sys.stdout) log_level = logging.INFO ch.setLevel(log_level) formatter = logging.Formatter("%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s", datefmt="%Y-%m-%d %H:%M:%S") formatter.converter = time.gmtime ch.setFormatter(formatter) # Remove all handlers associated with the root logger object so we can start over with an entirely fresh log configuration for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) logging.root.addHandler(ch) logging.getLogger("elasticsearch").setLevel(logging.WARN)
def print_race_info(cfg): track_name = cfg.opts("benchmarks", "track") challenge_name = cfg.opts("benchmarks", "challenge") selected_car_name = cfg.opts("benchmarks", "car") console.info("Racing on track [%s], challenge [%s] and car [%s]" % (track_name, challenge_name, selected_car_name)) # just ensure it is optically separated console.println("")
def generate(cfg): if cfg.opts("generator", "chart.type") == "time-series": chart_type = TimeSeriesCharts else: chart_type = BarCharts console.info("Loading track data...", flush=True) race_configs = load_race_configs(cfg) env = cfg.opts("system", "env.name") structures = [] console.info("Generating charts...", flush=True) for race_config in race_configs: charts = generate_index_ops(chart_type, race_config, env) + \ generate_io(chart_type, race_config, env) + \ generate_gc(chart_type, race_config, env) + \ generate_queries(chart_type, race_config, env) dashboard = generate_dashboard(env, race_config["track"], charts) structures.extend(charts) structures.append(dashboard) output_path = cfg.opts("generator", "output.path") if output_path: with open(io.normalize_path(output_path), mode="wt", encoding="utf-8") as f: print(json.dumps(structures, indent=4), file=f) else: print(json.dumps(structures, indent=4))
def setup(self): # at this point an actor system has to run and we should only join self.actor_system = actor.bootstrap_actor_system(try_join=True) self.mechanic = self.actor_system.createActor( mechanic.MechanicActor, targetActorRequirements={"coordinator": True}, globalName="/rally/mechanic/coordinator") logger.info("Asking mechanic to start the engine.") result = self.actor_system.ask( self.mechanic, mechanic.StartEngine(self.cfg, self.metrics_store.open_context, self.sources, self.build, self.distribution, self.external, self.docker)) if isinstance(result, mechanic.EngineStarted): logger.info("Mechanic has started engine successfully.") self.metrics_store.meta_info = result.system_meta_info cluster = result.cluster_meta_info self.race_store.store_race(self.track, cluster.hosts, cluster.revision, cluster.distribution_version) console.info("Racing on track [%s], challenge [%s] and car [%s]" % (self.track, self.track.find_challenge_or_default( self.cfg.opts("track", "challenge.name")), self.cfg.opts("mechanic", "car.name"))) # just ensure it is optically separated console.println("") elif isinstance(result, mechanic.Failure): logger.info("Starting engine has failed. Reason [%s]." % result.message) raise exceptions.RallyError(result.message) else: raise exceptions.RallyError( "Mechanic has not started engine but instead [%s]. Terminating race without result." % str(result))
def setup(self): # at this point an actor system has to run and we should only join self.actor_system = actor.bootstrap_actor_system(try_join=True) self.mechanic = self.actor_system.createActor(mechanic.MechanicActor, targetActorRequirements={"coordinator": True}, globalName="/rally/mechanic/coordinator") logger.info("Asking mechanic to start the engine.") # This can only work accurately if the user has already specified the correct version! cluster_settings = self.race.challenge.cluster_settings result = self.actor_system.ask(self.mechanic, mechanic.StartEngine( self.cfg, self.metrics_store.open_context, cluster_settings, self.sources, self.build, self.distribution, self.external, self.docker)) if isinstance(result, mechanic.EngineStarted): logger.info("Mechanic has started engine successfully.") self.metrics_store.meta_info = result.system_meta_info cluster = result.cluster_meta_info self.race.cluster = cluster if not self.cfg.exists("mechanic", "distribution.version"): self.cfg.add(config.Scope.benchmark, "mechanic", "distribution.version", cluster.distribution_version) logger.info("Reloading track based for distribution version [%s]" % cluster.distribution_version) t = self._load_track() self.race.track = t self.race.challenge = self._find_challenge(t) console.info("Racing on track [%s], challenge [%s] and car [%s]\n" % (self.race.track_name, self.race.challenge_name, self.race.car)) elif isinstance(result, mechanic.Failure): logger.info("Starting engine has failed. Reason [%s]." % result.message) raise exceptions.RallyError(result.message) else: raise exceptions.RallyError("Mechanic has not started engine but instead [%s]. Terminating race without result." % str(result))
def instrument_env(self, car, candidate_id): io.ensure_dir(self.log_root) log_file = "%s/%s-%s.jfr" % (self.log_root, car.safe_name, candidate_id) console.println("\n***************************************************************************\n") console.println("[WARNING] Java flight recorder is a commercial feature of the Oracle JDK.\n") console.println("You are using Java flight recorder which requires that you comply with\nthe licensing terms stated in:\n") console.println(console.format.link("http://www.oracle.com/technetwork/java/javase/terms/license/index.html")) console.println("\nBy using this feature you confirm that you comply with these license terms.\n") console.println("Otherwise, please abort and rerun Rally without the \"jfr\" telemetry device.") console.println("\n***************************************************************************\n") time.sleep(3) console.info("%s: Writing flight recording to [%s]" % (self.human_name, log_file), logger=logger) # this is more robust in case we want to use custom settings # see http://stackoverflow.com/questions/34882035/how-to-record-allocations-with-jfr-on-command-line # # in that case change to: -XX:StartFlightRecording=defaultrecording=true,settings=es-memory-profiling if self.java_major_version < 9: return {"ES_JAVA_OPTS": "-XX:+UnlockDiagnosticVMOptions -XX:+UnlockCommercialFeatures -XX:+DebugNonSafepoints " "-XX:+FlightRecorder " "-XX:FlightRecorderOptions=disk=true,maxage=0s,maxsize=0,dumponexit=true,dumponexitpath=%s " "-XX:StartFlightRecording=defaultrecording=true" % log_file} else: return {"ES_JAVA_OPTS": "-XX:+UnlockDiagnosticVMOptions -XX:+UnlockCommercialFeatures -XX:+DebugNonSafepoints " "-XX:StartFlightRecording=maxsize=0,maxage=0s,disk=true,dumponexit=true,filename=%s" % log_file}
def prepare(self, binary): if not self.preserve: console.info( "Rally will delete the benchmark candidate after the benchmark" ) self.es_installer.install(binary["elasticsearch"]) # we need to immediately delete it as plugins may copy their configuration during installation. self.es_installer.delete_pre_bundled_configuration() # determine after installation because some variables will depend on the install directory target_root_path = self.es_installer.es_home_path provisioner_vars = self._provisioner_variables() for p in self.es_installer.config_source_paths: self.apply_config(p, target_root_path, provisioner_vars) for installer in self.plugin_installers: installer.install(target_root_path, binary.get(installer.plugin_name)) for plugin_config_path in installer.config_source_paths: self.apply_config(plugin_config_path, target_root_path, provisioner_vars) for installer in self.plugin_installers: # Never let install hooks modify our original provisioner variables and just provide a copy! installer.invoke_install_hook(ProvisioningPhase.post_install, provisioner_vars.copy()) return NodeConfiguration( self.es_installer.car, self.es_installer.node_ip, self.es_installer.node_name, self.es_installer.node_root_dir, self.es_installer.es_home_path, self.es_installer.node_log_dir, self.es_installer.data_paths)
def decompress_corpus(archive_path, documents_path, uncompressed_size): if uncompressed_size: console.info( "Decompressing track data from [%s] to [%s] (resulting size: %.2f GB) ... " % (archive_path, documents_path, convert.bytes_to_gb(uncompressed_size)), end='', flush=True, logger=logger) else: console.info("Decompressing track data from [%s] to [%s] ... " % (archive_path, documents_path), end='', flush=True, logger=logger) io.decompress(archive_path, io.dirname(archive_path)) console.println("[OK]") if not os.path.isfile(documents_path): raise exceptions.DataError( "Decompressing [%s] did not create [%s]. Please check with the track author if the compressed " "archive has been created correctly." % (archive_path, documents_path)) extracted_bytes = os.path.getsize(documents_path) if uncompressed_size is not None and extracted_bytes != uncompressed_size: raise exceptions.DataError( "[%s] is corrupt. Extracted [%d] bytes but [%d] bytes are expected." % (documents_path, extracted_bytes, uncompressed_size))
def receiveMsg_StartEngine(self, msg, sender): self.logger.info("Received signal from race control to start engine.") self.race_control = sender self.cfg = msg.cfg self.car, _ = load_team(self.cfg, msg.external) # TODO: This is implicitly set by #load_team() - can we gather this elsewhere? self.team_revision = self.cfg.opts("mechanic", "repository.revision") # In our startup procedure we first create all mechanics. Only if this succeeds we'll continue. hosts = self.cfg.opts("client", "hosts").default if len(hosts) == 0: raise exceptions.LaunchError("No target hosts are configured.") self.externally_provisioned = msg.external if self.externally_provisioned: self.logger.info("Cluster will not be provisioned by Rally.") self.status = "nodes_started" self.received_responses = [] self.on_all_nodes_started() self.status = "cluster_started" else: console.info("Preparing for race ...", flush=True) self.logger.info("Cluster consisting of %s will be provisioned by Rally.", hosts) msg.hosts = hosts # Initialize the children array to have the right size to # ensure waiting for all responses self.children = [None] * len(nodes_by_host(to_ip_port(hosts))) self.send(self.createActor(Dispatcher), msg) self.status = "starting" self.received_responses = []
def setup(self, sources=False): # to load the track we need to know the correct cluster distribution version. Usually, this value should be set # but there are rare cases (external pipeline and user did not specify the distribution version) where we need # to derive it ourselves. For source builds we always assume "master" if not sources and not self.cfg.exists("mechanic", "distribution.version"): distribution_version = mechanic.cluster_distribution_version(self.cfg) self.logger.info("Automatically derived distribution version [%s]", distribution_version) self.cfg.add(config.Scope.benchmark, "mechanic", "distribution.version", distribution_version) min_es_version = versions.Version.from_string(version.minimum_es_version()) specified_version = versions.Version.from_string(distribution_version) if specified_version < min_es_version: raise exceptions.SystemSetupError(f"Cluster version must be at least [{min_es_version}] but was [{distribution_version}]") self.current_track = track.load_track(self.cfg) self.track_revision = self.cfg.opts("track", "repository.revision", mandatory=False) challenge_name = self.cfg.opts("track", "challenge.name") self.current_challenge = self.current_track.find_challenge_or_default(challenge_name) if self.current_challenge is None: raise exceptions.SystemSetupError( "Track [{}] does not provide challenge [{}]. List the available tracks with {} list tracks.".format( self.current_track.name, challenge_name, PROGRAM_NAME)) if self.current_challenge.user_info: console.info(self.current_challenge.user_info) self.race = metrics.create_race(self.cfg, self.current_track, self.current_challenge, self.track_revision) self.metrics_store = metrics.metrics_store( self.cfg, track=self.race.track_name, challenge=self.race.challenge_name, read_only=False ) self.race_store = metrics.race_store(self.cfg)
def from_distribution(cfg): version = cfg.opts("source", "distribution.version") repo_name = cfg.opts("source", "distribution.repository") if version.strip() == "": raise exceptions.SystemSetupError("Could not determine version. Please specify the Elasticsearch distribution " "to download with the command line parameter --distribution-version. " "E.g. --distribution-version=5.0.0") distributions_root = "%s/%s" % (cfg.opts("system", "root.dir"), cfg.opts("source", "distribution.dir")) io.ensure_dir(distributions_root) distribution_path = "%s/elasticsearch-%s.tar.gz" % (distributions_root, version) try: repo = distribution_repos[repo_name] except KeyError: raise exceptions.SystemSetupError("Unknown distribution repository [%s]. Valid values are: [%s]" % (repo_name, ",".join(distribution_repos.keys()))) download_url = repo.download_url(version) logger.info("Resolved download URL [%s] for version [%s]" % (download_url, version)) if not os.path.isfile(distribution_path) or repo.must_download: try: console.info("Downloading Elasticsearch %s ... " % version, logger=logger, flush=True, end="") net.download(download_url, distribution_path) console.println("[OK]") except urllib.error.HTTPError: console.println("[FAILED]") logging.exception("Cannot download Elasticsearch distribution for version [%s] from [%s]." % (version, download_url)) raise exceptions.SystemSetupError("Cannot download Elasticsearch distribution from [%s]. Please check that the specified " "version [%s] is correct." % (download_url, version)) else: logger.info("Skipping download for version [%s]. Found an existing binary locally at [%s]." % (version, distribution_path)) cfg.add(config.Scope.invocation, "builder", "candidate.bin.path", distribution_path)
def prepare_file_offset_table(data_file_path): """ Creates a file that contains a mapping from line numbers to file offsets for the provided path. This file is used internally by #skip_lines(data_file_path, data_file) to speed up line skipping. :param data_file_path: The path to a text file that is readable by this process. """ offset_file_path = "%s.offset" % data_file_path # recreate only if necessary as this can be time-consuming if not os.path.exists(offset_file_path) or os.path.getmtime( offset_file_path) < os.path.getmtime(data_file_path): console.info("Preparing file offset table for [%s] ... " % data_file_path, end="", flush=True, logger=logger) line_number = 0 with open(offset_file_path, mode="w") as offset_file: with open(data_file_path, mode="rt") as data_file: while True: line = data_file.readline() if len(line) == 0: break line_number += 1 if line_number % 50000 == 0: print("%d;%d" % (line_number, data_file.tell()), file=offset_file) console.println("[OK]") else: logger.info( "Skipping creation of file offset table at [%s] as it is still valid." % offset_file_path)
def prepare_file_offset_table(data_file_path): """ Creates a file that contains a mapping from line numbers to file offsets for the provided path. This file is used internally by #skip_lines(data_file_path, data_file) to speed up line skipping. :param data_file_path: The path to a text file that is readable by this process. :return The number of lines read or ``None`` if it did not have to build the file offset table. """ offset_file_path = "%s.offset" % data_file_path # recreate only if necessary as this can be time-consuming if not os.path.exists(offset_file_path) or os.path.getmtime(offset_file_path) < os.path.getmtime(data_file_path): console.info("Preparing file offset table for [%s] ... " % data_file_path, end="", flush=True) line_number = 0 with open(offset_file_path, mode="wt", encoding="utf-8") as offset_file: with open(data_file_path, mode="rt", encoding="utf-8") as data_file: while True: line = data_file.readline() if len(line) == 0: break line_number += 1 if line_number % 50000 == 0: print("%d;%d" % (line_number, data_file.tell()), file=offset_file) console.println("[OK]") return line_number else: return None
def prepare_file_offset_table(data_file_path): """ Creates a file that contains a mapping from line numbers to file offsets for the provided path. This file is used internally by #skip_lines(data_file_path, data_file) to speed up line skipping. :param data_file_path: The path to a text file that is readable by this process. :return The number of lines read or ``None`` if it did not have to build the file offset table. """ file_offset_table = FileOffsetTable.create_for_data_file(data_file_path) if not file_offset_table.is_valid(): console.info("Preparing file offset table for [%s] ... " % data_file_path, end="", flush=True) line_number = 0 with file_offset_table: with open(data_file_path, mode="rt", encoding="utf-8") as data_file: while True: line = data_file.readline() if len(line) == 0: break line_number += 1 if line_number % 50000 == 0: file_offset_table.add_offset(line_number, data_file.tell()) console.println("[OK]") return line_number else: return None
def decompress(data_set_path, expected_size_in_bytes): # we assume that track data are always compressed and try to decompress them before running the benchmark basename, extension = io.splitext(data_set_path) decompressed = False if not os.path.isfile(basename) or os.path.getsize( basename) != expected_size_in_bytes: decompressed = True if type.uncompressed_size_in_bytes: console.info( "Decompressing track data from [%s] to [%s] (resulting size: %.2f GB) ... " % (data_set_path, basename, convert.bytes_to_gb(type.uncompressed_size_in_bytes)), end='', flush=True, logger=logger) else: console.info( "Decompressing track data from [%s] to [%s] ... " % (data_set_path, basename), end='', flush=True, logger=logger) io.decompress(data_set_path, io.dirname(data_set_path)) console.println("[OK]") extracted_bytes = os.path.getsize(basename) if expected_size_in_bytes is not None and extracted_bytes != expected_size_in_bytes: raise exceptions.DataError( "[%s] is corrupt. Extracted [%d] bytes but [%d] bytes are expected." % (basename, extracted_bytes, expected_size_in_bytes)) return basename, decompressed
def stop(raise_errors=True): if actor.actor_system_already_running(): try: # TheSpian writes the following warning upon start (at least) on Mac OS X: # # WARNING:root:Unable to get address info for address 103.1.168.192.in-addr.arpa (AddressFamily.AF_INET,\ # SocketKind.SOCK_DGRAM, 17, 0): <class 'socket.gaierror'> [Errno 8] nodename nor servname provided, or not known # # Therefore, we will not show warnings but only errors. logging.basicConfig(level=logging.ERROR) running_system = actor.bootstrap_actor_system(try_join=True) running_system.shutdown() # await termination... console.info("Shutting down actor system.", end="", flush=True) while actor.actor_system_already_running(): console.println(".", end="", flush=True) time.sleep(1) console.println(" [OK]") except BaseException: console.error("Could not shut down actor system.") if raise_errors: # raise again so user can see the error raise elif raise_errors: console.error("Could not shut down actor system: Actor system is not running.") sys.exit(1)
def configure_logging(cfg): # Even if we don't log to a file, other parts of the application rely on this path to exist -> enforce log_file = log_file_path(cfg) log_dir = os.path.dirname(log_file) io.ensure_dir(log_dir) cfg.add(config.Scope.application, "system", "log.dir", log_dir) logging_output = cfg.opts("system", "logging.output") if logging_output == "file": console.info("Writing logs to %s" % log_file) # there is an old log file lying around -> backup if os.path.exists(log_file): os.rename( log_file, "%s-bak-%d.log" % (log_file, int(os.path.getctime(log_file)))) ch = logging.FileHandler(filename=log_file, mode="a") else: ch = logging.StreamHandler(stream=sys.stdout) log_level = logging.INFO ch.setLevel(log_level) formatter = logging.Formatter( "%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s", datefmt="%Y-%m-%d %H:%M:%S") formatter.converter = time.gmtime ch.setFormatter(formatter) # Remove all handlers associated with the root logger object so we can start over with an entirely fresh log configuration for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) logging.root.addHandler(ch) logging.getLogger("elasticsearch").setLevel(logging.WARN)
def stop(raise_errors=True): if actor.actor_system_already_running(): # noinspection PyBroadException try: # TheSpian writes the following warning upon start (at least) on Mac OS X: # # WARNING:root:Unable to get address info for address 103.1.168.192.in-addr.arpa (AddressFamily.AF_INET,\ # SocketKind.SOCK_DGRAM, 17, 0): <class 'socket.gaierror'> [Errno 8] nodename nor servname provided, or not known # # Therefore, we will not show warnings but only errors. logging.basicConfig(level=logging.ERROR) running_system = actor.bootstrap_actor_system(try_join=True) running_system.shutdown() # await termination... console.info("Shutting down actor system.", end="", flush=True) while actor.actor_system_already_running(): console.println(".", end="", flush=True) time.sleep(1) console.println(" [OK]") except BaseException: console.error("Could not shut down actor system.") if raise_errors: # raise again so user can see the error raise elif raise_errors: console.error( "Could not shut down actor system: Actor system is not running.") sys.exit(1)
def receiveMsg_StartEngine(self, msg, sender): self.logger.info("Received signal from race control to start engine.") self.race_control = sender self.cfg = msg.cfg cls = metrics.metrics_store_class(self.cfg) self.metrics_store = cls(self.cfg) self.metrics_store.open(ctx=msg.open_metrics_context) self.car, _ = load_team(self.cfg, msg.external) # In our startup procedure we first create all mechanics. Only if this succeeds we'll continue. hosts = self.cfg.opts("client", "hosts").default if len(hosts) == 0: raise exceptions.LaunchError("No target hosts are configured.") if msg.external: self.logger.info("Cluster will not be provisioned by Rally.") # just create one actor for this special case and run it on the coordinator node (i.e. here) m = self.createActor(NodeMechanicActor, targetActorRequirements={"coordinator": True}) self.children.append(m) self.send(m, msg.for_nodes(ip=hosts)) else: console.info("Preparing for race ...", flush=True) self.logger.info( "Cluster consisting of %s will be provisioned by Rally.", hosts) msg.hosts = hosts # Initialize the children array to have the right size to # ensure waiting for all responses self.children = [None] * len(nodes_by_host(to_ip_port(hosts))) self.send(self.createActor(Dispatcher), msg) self.status = "starting" self.received_responses = []
def prepare_file_offset_table(data_file_path): """ Creates a file that contains a mapping from line numbers to file offsets for the provided path. This file is used internally by #skip_lines(data_file_path, data_file) to speed up line skipping. :param data_file_path: The path to a text file that is readable by this process. :return The number of lines read or ``None`` if it did not have to build the file offset table. """ offset_file_path = "%s.offset" % data_file_path # recreate only if necessary as this can be time-consuming if not os.path.exists(offset_file_path) or os.path.getmtime(offset_file_path) < os.path.getmtime(data_file_path): console.info("Preparing file offset table for [%s] ... " % data_file_path, end="", flush=True, logger=logger) line_number = 0 with open(offset_file_path, mode="w") as offset_file: with open(data_file_path, mode="rt") as data_file: while True: line = data_file.readline() if len(line) == 0: break line_number += 1 if line_number % 50000 == 0: print("%d;%d" % (line_number, data_file.tell()), file=offset_file) console.println("[OK]") return line_number else: logger.info("Skipping creation of file offset table at [%s] as it is still valid." % offset_file_path) return None
def create_track(cfg): logger = logging.getLogger(__name__) track_name = cfg.opts("track", "track.name") indices = cfg.opts("generator", "indices") root_path = cfg.opts("generator", "output.path") target_hosts = cfg.opts("client", "hosts") client_options = cfg.opts("client", "options") logger.info("Creating track [%s] matching indices [%s]", track_name, indices) client = EsClientFactory( hosts=target_hosts.all_hosts[opts.TargetHosts.DEFAULT], client_options=client_options.all_client_options[opts.TargetHosts.DEFAULT] ).create() info = client.info() console.info(f"Connected to Elasticsearch cluster [{info['name']}] version [{info['version']['number']}].\n", logger=logger) output_path = os.path.abspath(os.path.join(io.normalize_path(root_path), track_name)) io.ensure_dir(output_path) indices, corpora = extract_mappings_and_corpora(client, output_path, indices) if len(indices) == 0: raise RuntimeError("Failed to extract any indices for track!") template_vars = {"track_name": track_name, "indices": indices, "corpora": corpora} track_path = os.path.join(output_path, "track.json") templates_path = os.path.join(cfg.opts("node", "rally.root"), "resources") process_template(templates_path, "track.json.j2", template_vars, track_path) console.println("") console.info(f"Track {track_name} has been created. Run it with: {PROGRAM_NAME} --track-path={output_path}")
def instrument_env(self, car, candidate_id): io.ensure_dir(self.log_root) log_file = "%s/%s-%s.gc.log" % (self.log_root, car.name, candidate_id) console.info("%s: Writing GC log to [%s]" % (self.human_name, log_file), logger=logger) return self.java_opts(log_file)
def instrument_env(self, car, candidate_id): log_root = "%s/%s" % (self.cfg.opts("system", "challenge.root.dir"), self.cfg.opts("benchmarks", "metrics.log.dir")) io.ensure_dir(log_root) log_file = "%s/%s-%s.jit.log" % (log_root, car.name, candidate_id) console.info("%s: Writing JIT compiler log to [%s]" % (self.human_name, log_file), logger=logger) return {"ES_JAVA_OPTS": "-XX:+UnlockDiagnosticVMOptions -XX:+TraceClassLoading -XX:+LogCompilation " "-XX:LogFile=%s -XX:+PrintAssembly" % log_file}
def start(args): if actor.actor_system_already_running(): raise exceptions.RallyError( "An actor system appears to be already running.") actor.bootstrap_actor_system(local_ip=args.node_ip, coordinator_ip=args.coordinator_ip) console.info( "Successfully started actor system on node [%s] with coordinator node IP [%s]." % (args.node_ip, args.coordinator_ip))
def _install_binary(self, binaries): if not self.preserve: console.info("Rally will delete the benchmark candidate after the benchmark") self.es_installer.install(binaries["elasticsearch"]) # we need to immediately delete it as plugins may copy their configuration during installation. self.es_installer.delete_pre_bundled_configuration() for installer in self.plugin_installers: installer.install(self.es_installer.es_home_path, binaries.get(installer.plugin_name))
def instrument_env(self, car, candidate_id): log_root = "%s/%s" % (self.cfg.opts("system", "challenge.root.dir"), self.cfg.opts("benchmarks", "metrics.log.dir")) io.ensure_dir(log_root) log_file = "%s/%s-%s.gc.log" % (log_root, car.name, candidate_id) console.info("%s: Writing GC log to [%s]" % (self.human_name, log_file), logger=logger) # TODO dm: These options change in JDK 9! return {"ES_JAVA_OPTS": "-Xloggc:%s -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps " "-XX:+PrintGCApplicationStoppedTime -XX:+PrintGCApplicationConcurrentTime -XX:+PrintTenuringDistribution" % log_file}
def configure_connection_params(arg_parser, args, cfg): # Also needed by mechanic (-> telemetry) - duplicate by module? target_hosts = opts.TargetHosts(args.target_hosts) cfg.add(config.Scope.applicationOverride, "client", "hosts", target_hosts) client_options = opts.ClientOptions(args.client_options, target_hosts=target_hosts) cfg.add(config.Scope.applicationOverride, "client", "options", client_options) if "timeout" not in client_options.default: console.info("You did not provide an explicit timeout in the client options. Assuming default of 10 seconds.") if list(target_hosts.all_hosts) != list(client_options.all_client_options): arg_parser.error("--target-hosts and --client-options must define the same keys for multi cluster setups.")
def instrument_env(self, car, candidate_id): io.ensure_dir(self.log_root) log_file = "%s/%s-%s.jit.log" % (self.log_root, car.name, candidate_id) console.info("%s: Writing JIT compiler log to [%s]" % (self.human_name, log_file), logger=logger) return { "ES_JAVA_OPTS": "-XX:+UnlockDiagnosticVMOptions -XX:+TraceClassLoading -XX:+LogCompilation " "-XX:LogFile=%s -XX:+PrintAssembly" % log_file }
def with_actor_system(runnable, cfg): already_running = actor.actor_system_already_running() logger.info("Actor system already running locally? [%s]" % str(already_running)) try: actors = actor.bootstrap_actor_system(try_join=already_running, prefer_local_only=not already_running) # We can only support remote benchmarks if we have a dedicated daemon that is not only bound to 127.0.0.1 cfg.add(config.Scope.application, "system", "remote.benchmarking.supported", already_running) except RuntimeError as e: logger.exception("Could not bootstrap actor system.") if str(e) == "Unable to determine valid external socket address.": console.warn("Could not determine a socket address. Are you running without any network? Switching to degraded mode.", logger=logger) actor.use_offline_actor_system() actors = actor.bootstrap_actor_system(try_join=True) else: raise try: runnable(cfg) finally: # We only shutdown the actor system if it was not already running before if not already_running: shutdown_complete = False times_interrupted = 0 while not shutdown_complete and times_interrupted < 2: try: logger.info("Attempting to shutdown internal actor system.") actors.shutdown() # note that this check will only evaluate to True for a TCP-based actor system. timeout = 15 while actor.actor_system_already_running() and timeout > 0: logger.info("Actor system is still running. Waiting...") time.sleep(1) timeout -= 1 if timeout > 0: shutdown_complete = True logger.info("Shutdown completed.") else: logger.warning("Shutdown timed out. Actor system is still running.") break except KeyboardInterrupt: times_interrupted += 1 logger.warning("User interrupted shutdown of internal actor system.") console.info("Please wait a moment for Rally's internal components to shutdown.") if not shutdown_complete and times_interrupted > 0: logger.warning("Terminating after user has interrupted actor system shutdown explicitly for [%d] times." % times_interrupted) console.println("") console.warn("Terminating now at the risk of leaving child processes behind.") console.println("") console.warn("The next race may fail due to an unclean shutdown.") console.println("") console.println(SKULL) console.println("") elif not shutdown_complete: console.warn("Could not terminate all internal processes within timeout. Please check and force-terminate all Rally processes.")
def download(cfg, url, local_path, size_in_bytes): offline = cfg.opts("system", "offline.mode") file_exists = os.path.isfile(local_path) # ensure we only skip the download if the file size also matches our expectation if file_exists and (size_in_bytes is None or os.path.getsize(local_path) == size_in_bytes): logger.info("[%s] already exists locally. Skipping download." % local_path) return False if not offline: try: io.ensure_dir(os.path.dirname(local_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) # ensure output appears immediately console.info( "Downloading data from [%s] (%s MB) to [%s] ... " % (url, size_in_mb, local_path), end='', flush=True, logger=logger) else: console.info("Downloading data from [%s] to [%s] ... " % (url, local_path), end='', flush=True, logger=logger) net.download(url, local_path, size_in_bytes) console.println("[OK]") except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (url, local_path)) # file must exist at this point -> verify if not os.path.isfile(local_path): if offline: raise exceptions.SystemSetupError( "Cannot find %s. Please disable offline mode and retry again." % local_path) else: raise exceptions.SystemSetupError( "Cannot download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (url, local_path, url)) actual_size = os.path.getsize(local_path) if size_in_bytes is not None and actual_size != size_in_bytes: raise exceptions.DataError( "[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected." % (local_path, actual_size, size_in_bytes)) return True
def _install_binary(self, binary): logger.info("Preparing candidate locally in [%s]." % self.install_dir) io.ensure_dir(self.install_dir) if not self.preserve: console.info( "Rally will delete the benchmark candidate after the benchmark" ) logger.info("Unzipping %s to %s" % (binary, self.install_dir)) io.decompress(binary, self.install_dir) self.binary_path = glob.glob("%s/elasticsearch*" % self.install_dir)[0]
def receiveMsg_EngineStarted(self, msg, sender): logger.info("Mechanic has started engine successfully.") self.metrics_store.meta_info = msg.system_meta_info cluster = msg.cluster_meta_info self.race.cluster = cluster console.info( "Racing on track [%s], challenge [%s] and car %s with version [%s].\n" % (self.race.track_name, self.race.challenge_name, self.race.car, self.race.cluster.distribution_version)) # start running we assume that each race has at least one lap self.run()
def receiveMsg_StopEngine(self, msg, sender): if self.cluster.preserve: console.info( "Keeping benchmark candidate including index at (may need several GB)." ) # detach from cluster and gather all system metrics self.cluster_launcher.stop(self.cluster) # we might have experienced a launch error or the user has cancelled the benchmark. Hence we need to allow to stop the # cluster from various states and we don't check here for a specific one. self.send_to_children_and_transition(sender, StopNodes(), [], "cluster_stopping")
def sweep(self): invocation_root = self.cfg.opts("system", "invocation.root.dir") track_name = self.cfg.opts("benchmarks", "track") challenge_name = self.cfg.opts("benchmarks", "challenge") car_name = self.cfg.opts("benchmarks", "car") log_root = paths.Paths(self.cfg).log_root() archive_path = "%s/logs-%s-%s-%s.zip" % (invocation_root, track_name, challenge_name, car_name) io.compress(log_root, archive_path) console.println("") console.info("Archiving logs in %s" % archive_path) shutil.rmtree(log_root)
def attach_to_node(self, node): log_root = "%s/%s" % (self.cfg.opts("system", "challenge.root.dir"), self.cfg.opts("benchmarks", "metrics.log.dir")) io.ensure_dir(log_root) log_file = "%s/%s.perf.log" % (log_root, node.node_name) console.info("%s: Writing perf logs to [%s]" % (self.human_name, log_file), logger=logger) self.log = open(log_file, "wb") self.process = subprocess.Popen(["perf", "stat", "-p %s" % node.process.pid], stdout=self.log, stderr=subprocess.STDOUT, stdin=subprocess.DEVNULL) self.node = node
def _install_binary(self): binary = self._config.opts("builder", "candidate.bin.path") install_dir = self._install_dir() logger.info("Preparing candidate locally in [%s]." % install_dir) io.ensure_dir(install_dir) if not self.preserve: console.info("Rally will delete the benchmark candidate after the benchmark") logger.info("Unzipping %s to %s" % (binary, install_dir)) io.decompress(binary, install_dir) binary_path = glob.glob("%s/elasticsearch*" % install_dir)[0] self._config.add(config.Scope.benchmark, "provisioning", "local.binary.path", binary_path)
def start(args): if actor.actor_system_already_running(): raise exceptions.RallyError("An actor system appears to be already running.") # TheSpian writes the following warning upon start (at least) on Mac OS X: # # WARNING:root:Unable to get address info for address 103.1.168.192.in-addr.arpa (AddressFamily.AF_INET,\ # SocketKind.SOCK_DGRAM, 17, 0): <class 'socket.gaierror'> [Errno 8] nodename nor servname provided, or not known # # Therefore, we will not show warnings but only errors. logging.basicConfig(level=logging.ERROR) actor.bootstrap_actor_system(local_ip=args.node_ip, coordinator_ip=args.coordinator_ip) console.info("Successfully started actor system on node [%s] with coordinator node IP [%s]." % (args.node_ip, args.coordinator_ip))
def instrument_env(self, car, candidate_id): log_root = "%s/%s" % (self.cfg.opts("system", "challenge.root.dir"), self.cfg.opts("benchmarks", "metrics.log.dir")) io.ensure_dir(log_root) log_file = "%s/%s-%s.jfr" % (log_root, car.name, candidate_id) console.info("%s: Writing flight recording to [%s]" % (self.human_name, log_file), logger=logger) # this is more robust in case we want to use custom settings # see http://stackoverflow.com/questions/34882035/how-to-record-allocations-with-jfr-on-command-line # # in that case change to: -XX:StartFlightRecording=defaultrecording=true,settings=es-memory-profiling return {"ES_JAVA_OPTS": "-XX:+UnlockDiagnosticVMOptions -XX:+UnlockCommercialFeatures -XX:+DebugNonSafepoints -XX:+FlightRecorder " "-XX:FlightRecorderOptions=disk=true,maxage=0s,maxsize=0,dumponexit=true,dumponexitpath=%s " "-XX:StartFlightRecording=defaultrecording=true" % log_file}
def cleanup(self): install_dir = self._install_dir() if self.preserve: logger.info("Preserving benchmark candidate installation at [%s]." % install_dir) console.info("Keeping benchmark candidate including index at [%s] (will need several GB)." % install_dir) else: logger.info("Wiping benchmark candidate installation at [%s]." % install_dir) if os.path.exists(install_dir): shutil.rmtree(install_dir) data_paths = self._config.opts("provisioning", "local.data.paths") if data_paths is not None: for path in data_paths: if os.path.exists(path): shutil.rmtree(path)
def from_sources(remote_url, src_dir, revision, gradle, java_home, log_dir, plugins, src_config, build=True): if build: console.info("Preparing for race ...", end="", flush=True) try: revisions = extract_revisions(revision) es_src_dir = os.path.join(src_dir, config_value(src_config, "elasticsearch.src.subdir")) try: es_revision = revisions["elasticsearch"] except KeyError: raise exceptions.SystemSetupError("No revision specified for Elasticsearch in [%s]." % revision) SourceRepository("Elasticsearch", remote_url, es_src_dir).fetch(es_revision) # this may as well be a core plugin and we need to treat them specially. :plugins:analysis-icu:assemble for plugin in plugins: if not plugin.core_plugin: plugin_remote_url = config_value(src_config, "plugin.%s.remote.repo.url" % plugin.name) plugin_src_dir = os.path.join(src_dir, config_value(src_config, "plugin.%s.src.subdir" % plugin.name)) try: plugin_revision = revisions[plugin.name] except KeyError: # maybe we can use the catch-all revision (only if it's not a git revision) plugin_revision = revisions.get("all") if not plugin_revision or SourceRepository.is_commit_hash(plugin_revision): raise exceptions.SystemSetupError("No revision specified for plugin [%s] in [%s]." % (plugin.name, revision)) else: logger.info("Revision for [%s] is not explicitly defined. Using catch-all revision [%s]." % (plugin.name, plugin_revision)) SourceRepository(plugin.name, plugin_remote_url, plugin_src_dir).fetch(plugin_revision) if build: builder = Builder(es_src_dir, gradle, java_home, log_dir) builder.build([CLEAN_TASK, ASSEMBLE_TASK]) for plugin in plugins: if plugin.core_plugin: task = ":plugins:%s:assemble" % plugin.name else: task = config_value(src_config, "plugin.%s.build.task" % plugin.name) builder.build([task]) console.println(" [OK]") binaries = {"elasticsearch": resolve_es_binary(es_src_dir)} for plugin in plugins: if plugin.core_plugin: binaries[plugin.name] = resolve_core_plugin_binary(plugin.name, es_src_dir) else: binaries[plugin.name] = resolve_plugin_binary(plugin.name, src_dir, src_config) return binaries except BaseException: if build: console.println(" [FAILED]") raise
def after_lap(self, lap): if self.laps > 1: lap_time = self.lap_timer.split_time() - self.lap_times self.lap_times += lap_time hl, ml, sl = convert.seconds_to_hour_minute_seconds(lap_time) reporter.summarize(self.metrics_store, self.cfg, track=self.track, lap=lap) console.println("") if lap < self.laps: remaining = (self.laps - lap) * self.lap_times / lap hr, mr, sr = convert.seconds_to_hour_minute_seconds(remaining) console.info("Lap time %02d:%02d:%02d (ETA: %02d:%02d:%02d)" % (hl, ml, sl, hr, mr, sr), logger=logger) else: console.info("Lap time %02d:%02d:%02d" % (hl, ml, sl), logger=logger) console.println("")
def from_sources(cfg, build=True): if build: console.info("Preparing for race ...", end="", flush=True) try: builder = Builder(cfg) SourceRepository(cfg).fetch() if build: builder.build() builder.add_binary_to_config() if build: console.println(" [OK]") except BaseException: if build: console.println(" [FAILED]") raise
def after_lap(self): logger.info("Finished lap [%d/%d]" % (self.current_lap, self.race.total_laps)) if self.race.total_laps > 1: lap_time = self.lap_timer.split_time() - self.lap_times self.lap_times += lap_time hl, ml, sl = convert.seconds_to_hour_minute_seconds(lap_time) lap_results = reporter.calculate_results(self.metrics_store, self.race, self.current_lap) self.race.add_lap_results(lap_results) reporter.summarize(self.race, self.cfg, lap=self.current_lap) console.println("") if self.current_lap < self.race.total_laps: remaining = (self.race.total_laps - self.current_lap) * self.lap_times / self.current_lap hr, mr, sr = convert.seconds_to_hour_minute_seconds(remaining) console.info("Lap time %02d:%02d:%02d (ETA: %02d:%02d:%02d)" % (hl, ml, sl, hr, mr, sr), logger=logger) else: console.info("Lap time %02d:%02d:%02d" % (hl, ml, sl), logger=logger) console.println("")
def cleanup(preserve, install_dir, data_paths): if preserve: logger.info("Preserving benchmark candidate installation at [%s]." % install_dir) console.info("Keeping benchmark candidate including index at [%s] (will need several GB)." % install_dir) else: logger.info("Wiping benchmark candidate installation at [%s]." % install_dir) for path in data_paths: if os.path.exists(path): try: shutil.rmtree(path) except OSError: logger.exception("Could not delete [%s]. Skipping..." % path) if os.path.exists(install_dir): try: shutil.rmtree(install_dir) except OSError: logger.exception("Could not delete [%s]. Skipping..." % install_dir)
def download(cfg, url, local_path, size_in_bytes): offline = cfg.opts("system", "offline.mode") file_exists = os.path.isfile(local_path) # ensure we only skip the download if the file size also matches our expectation if file_exists and (size_in_bytes is None or os.path.getsize(local_path) == size_in_bytes): logger.info("[%s] already exists locally. Skipping download." % local_path) return False if not offline: try: io.ensure_dir(os.path.dirname(local_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) # ensure output appears immediately console.info("Downloading data from [%s] (%s MB) to [%s] ... " % (url, size_in_mb, local_path), end='', flush=True, logger=logger) else: console.info("Downloading data from [%s] to [%s] ... " % (url, local_path), end='', flush=True, logger=logger) net.download(url, local_path, size_in_bytes) console.println("[OK]") except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (url, local_path)) # file must exist at this point -> verify if not os.path.isfile(local_path): if offline: raise exceptions.SystemSetupError( "Cannot find %s. Please disable offline mode and retry again." % local_path) else: raise exceptions.SystemSetupError( "Cannot download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (url, local_path, url)) actual_size = os.path.getsize(local_path) if size_in_bytes is not None and actual_size != size_in_bytes: raise exceptions.DataError("[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected." % (local_path, actual_size, size_in_bytes)) return True
def configure_logging(cfg): start_time = rtime.to_iso8601(cfg.opts("system", "time.start")) logging_output = cfg.opts("system", "logging.output") profiling_enabled = cfg.opts("driver", "profiling") if logging_output == "file": log_file = application_log_file_path(start_time) log_dir = os.path.dirname(log_file) io.ensure_dir(log_dir) console.info("Writing logs to %s" % log_file) # there is an old log file lying around -> backup if os.path.exists(log_file): os.rename(log_file, "%s-bak-%d.log" % (log_file, int(os.path.getctime(log_file)))) ch = logging.FileHandler(filename=log_file, mode="a") else: ch = logging.StreamHandler(stream=sys.stdout) log_level = logging.INFO ch.setLevel(log_level) formatter = logging.Formatter("%(asctime)s,%(msecs)d PID:%(process)d %(name)s %(levelname)s %(message)s", datefmt="%Y-%m-%d %H:%M:%S") formatter.converter = time.gmtime ch.setFormatter(formatter) # Remove all handlers associated with the root logger object so we can start over with an entirely fresh log configuration for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) logging.root.addHandler(ch) logging.getLogger("elasticsearch").setLevel(logging.WARNING) if profiling_enabled: profile_file = "%s/profile.log" % application_log_dir_path() log_dir = os.path.dirname(profile_file) io.ensure_dir(log_dir) console.info("Writing driver profiling data to %s" % profile_file) handler = logging.FileHandler(filename=profile_file, encoding="UTF-8") handler.setFormatter(formatter) profile_logger = logging.getLogger("rally.profile") profile_logger.setLevel(logging.INFO) profile_logger.addHandler(handler)
def decompress(data_set_path, expected_size_in_bytes): # we assume that track data are always compressed and try to decompress them before running the benchmark basename, extension = io.splitext(data_set_path) decompressed = False if not os.path.isfile(basename) or os.path.getsize(basename) != expected_size_in_bytes: decompressed = True if type.uncompressed_size_in_bytes: console.info("Decompressing track data from [%s] to [%s] (resulting size: %.2f GB) ... " % (data_set_path, basename, convert.bytes_to_gb(type.uncompressed_size_in_bytes)), end='', flush=True, logger=logger) else: console.info("Decompressing track data from [%s] to [%s] ... " % (data_set_path, basename), end='', flush=True, logger=logger) io.decompress(data_set_path, io.dirname(data_set_path)) console.println("[OK]") extracted_bytes = os.path.getsize(basename) if expected_size_in_bytes is not None and extracted_bytes != expected_size_in_bytes: raise exceptions.DataError("[%s] is corrupt. Extracted [%d] bytes but [%d] bytes are expected." % (basename, extracted_bytes, expected_size_in_bytes)) return basename, decompressed
def setup(self, msg): self.mechanic = self.createActor(mechanic.MechanicActor, #globalName="/rally/mechanic/coordinator", targetActorRequirements={"coordinator": True}) self.cfg = msg.cfg # to load the track we need to know the correct cluster distribution version. Usually, this value should be set but there are rare # cases (external pipeline and user did not specify the distribution version) where we need to derive it ourselves. For source # builds we always assume "master" if not msg.sources and not self.cfg.exists("mechanic", "distribution.version"): distribution_version = mechanic.cluster_distribution_version(self.cfg) if not distribution_version: raise exceptions.SystemSetupError("A distribution version is required. Please specify it with --distribution-version.") logger.info("Automatically derived distribution version [%s]" % distribution_version) self.cfg.add(config.Scope.benchmark, "mechanic", "distribution.version", distribution_version) t = track.load_track(self.cfg) challenge_name = self.cfg.opts("track", "challenge.name") challenge = t.find_challenge_or_default(challenge_name) if challenge is None: raise exceptions.SystemSetupError("Track [%s] does not provide challenge [%s]. List the available tracks with %s list tracks." % (t.name, challenge_name, PROGRAM_NAME)) if challenge.user_info: console.info(challenge.user_info, logger=logger) self.race = metrics.create_race(self.cfg, t, challenge) self.metrics_store = metrics.metrics_store( self.cfg, track=self.race.track_name, challenge=self.race.challenge_name, read_only=False ) self.lap_counter = LapCounter(self.race, self.metrics_store, self.cfg) self.race_store = metrics.race_store(self.cfg) logger.info("Asking mechanic to start the engine.") cluster_settings = self.race.challenge.cluster_settings self.send(self.mechanic, mechanic.StartEngine(self.cfg, self.metrics_store.open_context, cluster_settings, msg.sources, msg.build, msg.distribution, msg.external, msg.docker))
def receiveMessage(self, msg, sender): try: logger.info("BenchmarkActor#receiveMessage(msg = [%s] sender = [%s])" % (str(type(msg)), str(sender))) if isinstance(msg, Setup): self.start_sender = sender self.setup(msg) elif isinstance(msg, mechanic.EngineStarted): logger.info("Mechanic has started engine successfully.") self.metrics_store.meta_info = msg.system_meta_info cluster = msg.cluster_meta_info self.race.cluster = cluster console.info("Racing on track [%s], challenge [%s] and car %s\n" % (self.race.track_name, self.race.challenge_name, self.race.car)) # start running we assume that each race has at least one lap self.run() elif isinstance(msg, driver.TaskFinished): logger.info("Task has finished.") logger.info("Bulk adding request metrics to metrics store.") self.metrics_store.bulk_add(msg.metrics) # We choose *NOT* to reset our own metrics store's timer as this one is only used to collect complete metrics records from # other stores (used by driver and mechanic). Hence there is no need to reset the timer in our own metrics store. self.send(self.mechanic, mechanic.ResetRelativeTime(msg.next_task_scheduled_in)) elif isinstance(msg, actor.BenchmarkCancelled): self.cancelled = True # even notify the start sender if it is the originator. The reason is that we call #ask() which waits for a reply. # We also need to ask in order to avoid races between this notification and the following ActorExitRequest. self.send(self.start_sender, msg) elif isinstance(msg, actor.BenchmarkFailure): logger.info("Received a benchmark failure from [%s] and will forward it now." % sender) self.error = True self.send(self.start_sender, msg) elif isinstance(msg, driver.BenchmarkComplete): logger.info("Benchmark is complete.") logger.info("Bulk adding request metrics to metrics store.") self.metrics_store.bulk_add(msg.metrics) self.send(self.main_driver, thespian.actors.ActorExitRequest()) self.main_driver = None self.send(self.mechanic, mechanic.OnBenchmarkStop()) elif isinstance(msg, mechanic.BenchmarkStopped): logger.info("Bulk adding system metrics to metrics store.") self.metrics_store.bulk_add(msg.system_metrics) logger.info("Flushing metrics data...") self.metrics_store.flush() logger.info("Flushing done") self.lap_counter.after_lap() if self.lap_counter.has_more_laps(): self.run() else: self.teardown() elif isinstance(msg, mechanic.EngineStopped): logger.info("Mechanic has stopped engine successfully.") logger.info("Bulk adding system metrics to metrics store.") self.metrics_store.bulk_add(msg.system_metrics) self.metrics_store.flush() if not self.cancelled and not self.error: final_results = reporter.calculate_results(self.metrics_store, self.race) self.race.add_final_results(final_results) reporter.summarize(self.race, self.cfg) self.race_store.store_race(self.race) else: logger.info("Suppressing output of summary report. Cancelled = [%r], Error = [%r]." % (self.cancelled, self.error)) self.metrics_store.close() self.send(self.start_sender, Success()) elif isinstance(msg, thespian.actors.ActorExitRequest): if self.mechanic: self.send(self.mechanic, msg) self.mechanic = None if self.main_driver: self.send(self.main_driver, msg) self.main_driver = None else: logger.info("BenchmarkActor received unknown message [%s] (ignoring)." % (str(msg))) except BaseException as e: self.error = True logger.exception("BenchmarkActor encountered a fatal exception. Shutting down.") self.send(self.start_sender, actor.BenchmarkFailure("Could not execute benchmark", e))
def main(): check_python_version() start = time.time() # Early init of console output so we start to show everything consistently. console.init(quiet=False) # allow to see a thread-dump on SIGQUIT faulthandler.register(signal.SIGQUIT, file=sys.stderr) pre_configure_logging() args = parse_args() console.init(quiet=args.quiet) console.println(BANNER) cfg = config.Config(config_name=args.configuration_name) sub_command = derive_sub_command(args, cfg) ensure_configuration_present(cfg, args, sub_command) if args.effective_start_date: cfg.add(config.Scope.application, "system", "time.start", args.effective_start_date) cfg.add(config.Scope.application, "system", "time.start.user_provided", True) else: cfg.add(config.Scope.application, "system", "time.start", datetime.datetime.utcnow()) cfg.add(config.Scope.application, "system", "time.start.user_provided", False) cfg.add(config.Scope.applicationOverride, "system", "quiet.mode", args.quiet) # per node? cfg.add(config.Scope.applicationOverride, "system", "offline.mode", args.offline) cfg.add(config.Scope.applicationOverride, "system", "logging.output", args.logging) # Local config per node cfg.add(config.Scope.application, "node", "rally.root", paths.rally_root()) cfg.add(config.Scope.application, "node", "rally.cwd", os.getcwd()) cfg.add(config.Scope.applicationOverride, "mechanic", "source.revision", args.revision) #TODO dm: Consider renaming this one. It's used by different modules if args.distribution_version: cfg.add(config.Scope.applicationOverride, "mechanic", "distribution.version", args.distribution_version) cfg.add(config.Scope.applicationOverride, "mechanic", "distribution.repository", args.distribution_repository) cfg.add(config.Scope.applicationOverride, "mechanic", "repository.name", args.team_repository) cfg.add(config.Scope.applicationOverride, "mechanic", "car.names", csv_to_list(args.car)) cfg.add(config.Scope.applicationOverride, "mechanic", "car.plugins", csv_to_list(args.elasticsearch_plugins)) cfg.add(config.Scope.applicationOverride, "mechanic", "node.datapaths", csv_to_list(args.data_paths)) if args.keep_cluster_running: cfg.add(config.Scope.applicationOverride, "mechanic", "keep.running", True) # force-preserve the cluster nodes. cfg.add(config.Scope.applicationOverride, "mechanic", "preserve.install", True) else: cfg.add(config.Scope.applicationOverride, "mechanic", "keep.running", False) cfg.add(config.Scope.applicationOverride, "mechanic", "preserve.install", convert.to_bool(args.preserve_install)) cfg.add(config.Scope.applicationOverride, "mechanic", "telemetry.devices", csv_to_list(args.telemetry)) cfg.add(config.Scope.applicationOverride, "race", "pipeline", args.pipeline) cfg.add(config.Scope.applicationOverride, "race", "laps", args.laps) cfg.add(config.Scope.applicationOverride, "race", "user.tag", args.user_tag) cfg.add(config.Scope.applicationOverride, "track", "repository.name", args.track_repository) cfg.add(config.Scope.applicationOverride, "track", "track.name", args.track) cfg.add(config.Scope.applicationOverride, "track", "challenge.name", args.challenge) cfg.add(config.Scope.applicationOverride, "track", "include.tasks", csv_to_list(args.include_tasks)) cfg.add(config.Scope.applicationOverride, "track", "test.mode.enabled", args.test_mode) cfg.add(config.Scope.applicationOverride, "track", "auto_manage_indices", to_bool(args.auto_manage_indices)) cfg.add(config.Scope.applicationOverride, "reporting", "format", args.report_format) cfg.add(config.Scope.applicationOverride, "reporting", "output.path", args.report_file) if sub_command == "compare": cfg.add(config.Scope.applicationOverride, "reporting", "baseline.timestamp", args.baseline) cfg.add(config.Scope.applicationOverride, "reporting", "contender.timestamp", args.contender) ################################ # new section name: driver ################################ cfg.add(config.Scope.applicationOverride, "driver", "cluster.health", args.cluster_health) cfg.add(config.Scope.applicationOverride, "driver", "profiling", args.enable_driver_profiling) cfg.add(config.Scope.applicationOverride, "driver", "load_driver_hosts", csv_to_list(args.load_driver_hosts)) if sub_command != "list": # Also needed by mechanic (-> telemetry) - duplicate by module? cfg.add(config.Scope.applicationOverride, "client", "hosts", _normalize_hosts(csv_to_list(args.target_hosts))) client_options = kv_to_map(csv_to_list(args.client_options)) cfg.add(config.Scope.applicationOverride, "client", "options", client_options) if "timeout" not in client_options: console.info("You did not provide an explicit timeout in the client options. Assuming default of 10 seconds.") # split by component? if sub_command == "list": cfg.add(config.Scope.applicationOverride, "system", "list.config.option", args.configuration) cfg.add(config.Scope.applicationOverride, "system", "list.races.max_results", args.limit) configure_logging(cfg) logger.info("OS [%s]" % str(os.uname())) logger.info("Python [%s]" % str(sys.implementation)) logger.info("Rally version [%s]" % version.version()) logger.info("Command line arguments: %s" % args) # Configure networking net.init() if not args.offline: if not net.has_internet_connection(): console.warn("No Internet connection detected. Automatic download of track data sets etc. is disabled.", logger=logger) cfg.add(config.Scope.applicationOverride, "system", "offline.mode", True) else: logger.info("Detected a working Internet connection.") # Kill any lingering Rally processes before attempting to continue - the actor system needs to be a singleton on this machine # noinspection PyBroadException try: process.kill_running_rally_instances() except BaseException: logger.exception("Could not terminate potentially running Rally instances correctly. Attempting to go on anyway.") success = dispatch_sub_command(cfg, sub_command) end = time.time() if success: console.println("") console.info("SUCCESS (took %d seconds)" % (end - start), overline="-", underline="-") else: console.println("") console.info("FAILURE (took %d seconds)" % (end - start), overline="-", underline="-") sys.exit(64)
def main(): start = time.time() # Early init of console output so we start to show everything consistently. console.init(quiet=False) pre_configure_logging() args = parse_args() console.init(quiet=args.quiet) console.println(BANNER) cfg = config.Config(config_name=args.configuration_name) sub_command = derive_sub_command(args, cfg) ensure_configuration_present(cfg, args, sub_command) # Add global meta info derived by rally itself cfg.add(config.Scope.application, "meta", "time.start", args.effective_start_date) cfg.add(config.Scope.application, "system", "rally.root", rally_root_path()) cfg.add(config.Scope.application, "system", "rally.cwd", os.getcwd()) cfg.add(config.Scope.application, "system", "invocation.root.dir", paths.Paths(cfg).invocation_root()) # Add command line config cfg.add(config.Scope.applicationOverride, "source", "revision", args.revision) cfg.add(config.Scope.applicationOverride, "source", "distribution.version", args.distribution_version) cfg.add(config.Scope.applicationOverride, "source", "distribution.repository", args.distribution_repository) cfg.add(config.Scope.applicationOverride, "system", "pipeline", args.pipeline) cfg.add(config.Scope.applicationOverride, "system", "track.repository", args.track_repository) cfg.add(config.Scope.applicationOverride, "system", "quiet.mode", args.quiet) cfg.add(config.Scope.applicationOverride, "system", "offline.mode", args.offline) cfg.add(config.Scope.applicationOverride, "system", "user.tag", args.user_tag) cfg.add(config.Scope.applicationOverride, "system", "logging.output", args.logging) cfg.add(config.Scope.applicationOverride, "telemetry", "devices", csv_to_list(args.telemetry)) cfg.add(config.Scope.applicationOverride, "benchmarks", "track", args.track) cfg.add(config.Scope.applicationOverride, "benchmarks", "challenge", args.challenge) cfg.add(config.Scope.applicationOverride, "benchmarks", "car", args.car) cfg.add(config.Scope.applicationOverride, "benchmarks", "cluster.health", args.cluster_health) cfg.add(config.Scope.applicationOverride, "benchmarks", "laps", args.laps) cfg.add(config.Scope.applicationOverride, "benchmarks", "test.mode", args.test_mode) cfg.add(config.Scope.applicationOverride, "provisioning", "datapaths", csv_to_list(args.data_paths)) cfg.add(config.Scope.applicationOverride, "provisioning", "install.preserve", convert.to_bool(args.preserve_install)) cfg.add(config.Scope.applicationOverride, "launcher", "external.target.hosts", convert_hosts(csv_to_list(args.target_hosts))) cfg.add(config.Scope.applicationOverride, "launcher", "client.options", kv_to_map(csv_to_list(args.client_options))) cfg.add(config.Scope.applicationOverride, "report", "reportformat", args.report_format) cfg.add(config.Scope.applicationOverride, "report", "reportfile", args.report_file) if args.override_src_dir is not None: cfg.add(config.Scope.applicationOverride, "source", "local.src.dir", args.override_src_dir) if sub_command == "list": cfg.add(config.Scope.applicationOverride, "system", "list.config.option", args.configuration) cfg.add(config.Scope.applicationOverride, "system", "list.races.max_results", args.limit) if sub_command == "compare": cfg.add(config.Scope.applicationOverride, "report", "comparison.baseline.timestamp", args.baseline) cfg.add(config.Scope.applicationOverride, "report", "comparison.contender.timestamp", args.contender) configure_logging(cfg) logger.info("Rally version [%s]" % version()) logger.info("Command line arguments: %s" % args) # Configure networking net.init() if not args.offline: if not net.has_internet_connection(): console.warn("No Internet connection detected. Automatic download of track data sets etc. is disabled.", logger=logger) cfg.add(config.Scope.applicationOverride, "system", "offline.mode", True) else: logger.info("Detected a working Internet connection.") # Kill any lingering Rally processes before attempting to continue - the actor system needs to a singleton on this machine # noinspection PyBroadException try: process.kill_running_rally_instances() except BaseException: logger.exception("Could not terminate potentially running Rally instances correctly. Attempting to go on anyway.") try: actors = bootstrap_actor_system(cfg) except RuntimeError as e: logger.exception("Could not bootstrap actor system.") if str(e) == "Unable to determine valid external socket address.": console.warn("Could not determine a socket address. Are you running without any network?", logger=logger) actors = bootstrap_actor_system(cfg, system_base="multiprocQueueBase") else: raise success = False try: success = dispatch_sub_command(cfg, sub_command) finally: shutdown_complete = False times_interrupted = 0 while not shutdown_complete and times_interrupted < 2: try: logger.info("Attempting to shutdown internal actor system.") actors.shutdown() shutdown_complete = True logger.info("Shutdown completed.") except KeyboardInterrupt: times_interrupted += 1 logger.warn("User interrupted shutdown of internal actor system.") console.info("Please wait a moment for Rally's internal components to shutdown.") if not shutdown_complete and times_interrupted > 0: logger.warn("Terminating after user has interrupted actor system shutdown explicitly for [%d] times." % times_interrupted) console.println("") console.warn("Terminating now at the risk of leaving child processes behind.") console.println("") console.warn("The next race may fail due to an unclean shutdown.") console.println("") console.println(SKULL) console.println("") end = time.time() if success: console.println("") console.info("SUCCESS (took %d seconds)" % (end - start), overline="-", underline="-") else: console.println("") console.info("FAILURE (took %d seconds)" % (end - start), overline="-", underline="-") sys.exit(64)