def create(cfg, metrics_store, node_ip, node_http_port, all_node_ips, all_node_ids, sources=False, distribution=False, external=False, docker=False): race_root_path = paths.race_root(cfg) node_ids = cfg.opts("provisioning", "node.ids", mandatory=False) node_name_prefix = cfg.opts("provisioning", "node.name.prefix") car, plugins = load_team(cfg, external) if sources or distribution: s = supplier.create(cfg, sources, distribution, car, plugins) p = [] all_node_names = ["%s-%s" % (node_name_prefix, n) for n in all_node_ids] for node_id in node_ids: node_name = "%s-%s" % (node_name_prefix, node_id) p.append( provisioner.local(cfg, car, plugins, node_ip, node_http_port, all_node_ips, all_node_names, race_root_path, node_name)) l = launcher.ProcessLauncher(cfg) elif external: raise exceptions.RallyAssertionError("Externally provisioned clusters should not need to be managed by Rally's mechanic") elif docker: if len(plugins) > 0: raise exceptions.SystemSetupError("You cannot specify any plugins for Docker clusters. Please remove " "\"--elasticsearch-plugins\" and try again.") s = lambda: None p = [] for node_id in node_ids: node_name = "%s-%s" % (node_name_prefix, node_id) p.append(provisioner.docker(cfg, car, node_ip, node_http_port, race_root_path, node_name)) l = launcher.DockerLauncher(cfg) else: # It is a programmer error (and not a user error) if this function is called with wrong parameters raise RuntimeError("One of sources, distribution, docker or external must be True") return Mechanic(cfg, metrics_store, s, p, l)
def test_daemon_start_stop(self, wait_for_pidfile, chdir, get_size, supports, java_home, kill): cfg = config.Config() cfg.add(config.Scope.application, "node", "root.dir", "test") cfg.add(config.Scope.application, "mechanic", "keep.running", False) cfg.add(config.Scope.application, "mechanic", "telemetry.devices", []) cfg.add(config.Scope.application, "mechanic", "telemetry.params", None) cfg.add(config.Scope.application, "system", "env.name", "test") ms = get_metrics_store(cfg) proc_launcher = launcher.ProcessLauncher(cfg, ms, paths.races_root(cfg)) node_config = NodeConfiguration(car=Car("default", root_path=None, config_paths=[]), ip="127.0.0.1", node_name="testnode", node_root_path="/tmp", binary_path="/tmp", log_path="/tmp", data_paths="/tmp") nodes = proc_launcher.start([node_config]) self.assertEqual(len(nodes), 1) self.assertEqual(nodes[0].pid, MOCK_PID_VALUE) proc_launcher.stop(nodes) self.assertTrue(kill.called)
def test_env_options_order(self): cfg = config.Config() cfg.add(config.Scope.application, "mechanic", "keep.running", False) cfg.add(config.Scope.application, "system", "env.name", "test") ms = get_metrics_store(cfg) proc_launcher = launcher.ProcessLauncher(cfg, ms, races_root_dir="/home") default_car = team.Car(names="default-car", root_path=None, config_paths=["/tmp/rally-config"]) node_telemetry = [ telemetry.FlightRecorder(telemetry_params={}, log_root="/tmp/telemetry", java_major_version=8) ] t = telemetry.Telemetry(["jfr"], devices=node_telemetry) env = proc_launcher._prepare_env(car=default_car, node_name="node0", java_home="/java_home", t=t) self.assertEqual("/java_home/bin" + os.pathsep + os.environ["PATH"], env["PATH"]) self.assertEqual( "-XX:+ExitOnOutOfMemoryError -XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints " "-XX:+UnlockCommercialFeatures -XX:+FlightRecorder " "-XX:FlightRecorderOptions=disk=true,maxage=0s,maxsize=0,dumponexit=true,dumponexitpath=/tmp/telemetry/default-car-node0.jfr " "-XX:StartFlightRecording=defaultrecording=true", env["ES_JAVA_OPTS"])
def test_daemon_start_stop(self, wait_for_pidfile, chdir, get_size, supports, java_home): cfg = config.Config() cfg.add(config.Scope.application, "node", "root.dir", "test") cfg.add(config.Scope.application, "mechanic", "keep.running", False) cfg.add(config.Scope.application, "telemetry", "devices", []) cfg.add(config.Scope.application, "telemetry", "params", None) cfg.add(config.Scope.application, "system", "env.name", "test") ms = get_metrics_store(cfg) proc_launcher = launcher.ProcessLauncher(cfg) node_configs = [] for node in range(2): node_configs.append( NodeConfiguration(build_type="tar", car_env={}, car_runtime_jdks="12,11", ip="127.0.0.1", node_name="testnode-{}".format(node), node_root_path="/tmp", binary_path="/tmp", data_paths="/tmp")) nodes = proc_launcher.start(node_configs) self.assertEqual(len(nodes), 2) self.assertEqual(nodes[0].pid, MOCK_PID_VALUE) stopped_nodes = proc_launcher.stop(nodes, ms) # all nodes should be stopped self.assertEqual(nodes, stopped_nodes)
def test_env_options_order(self, sleep): cfg = config.Config() cfg.add(config.Scope.application, "mechanic", "keep.running", False) cfg.add(config.Scope.application, "system", "env.name", "test") proc_launcher = launcher.ProcessLauncher(cfg) node_telemetry = [ telemetry.FlightRecorder(telemetry_params={}, log_root="/tmp/telemetry", java_major_version=8) ] t = telemetry.Telemetry(["jfr"], devices=node_telemetry) env = proc_launcher._prepare_env(car_env={}, node_name="node0", java_home="/java_home", t=t) self.assertEqual("/java_home/bin" + os.pathsep + os.environ["PATH"], env["PATH"]) self.assertEqual( "-XX:+ExitOnOutOfMemoryError -XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints " "-XX:+UnlockCommercialFeatures -XX:+FlightRecorder " "-XX:FlightRecorderOptions=disk=true,maxage=0s,maxsize=0,dumponexit=true,dumponexitpath=/tmp/telemetry/profile.jfr " # pylint: disable=line-too-long "-XX:StartFlightRecording=defaultrecording=true", env["ES_JAVA_OPTS"])
def test_daemon_start_stop(self, process, wait_for_pidfile, node_config, ms, cfg, chdir, supports, java_home, kill): proc_launcher = launcher.ProcessLauncher(cfg, ms, paths.races_root(cfg)) nodes = proc_launcher.start([node_config]) self.assertEqual(len(nodes), 1) self.assertEqual(nodes[0].pid, MOCK_PID_VALUE) proc_launcher.keep_running = False proc_launcher.stop(nodes) self.assertTrue(kill.called)
def test_bundled_jdk_not_in_path(self): cfg = config.Config() cfg.add(config.Scope.application, "mechanic", "keep.running", False) cfg.add(config.Scope.application, "system", "env.name", "test") proc_launcher = launcher.ProcessLauncher(cfg) t = telemetry.Telemetry() # no JAVA_HOME -> use the bundled JDK env = proc_launcher._prepare_env(car_env={}, node_name="node0", java_home=None, t=t) # unmodified self.assertEqual(os.environ["PATH"], env["PATH"])
def create(cfg, metrics_store, all_node_ips, cluster_settings=None, sources=False, build=False, distribution=False, external=False, docker=False): races_root = paths.races_root(cfg) challenge_root_path = paths.race_root(cfg) node_ids = cfg.opts("provisioning", "node.ids", mandatory=False) car, plugins = load_team(cfg, external) if sources or distribution: s = supplier.create(cfg, sources, distribution, build, challenge_root_path, car, plugins) p = [] for node_id in node_ids: p.append( provisioner.local_provisioner(cfg, car, plugins, cluster_settings, all_node_ips, challenge_root_path, node_id)) l = launcher.ProcessLauncher(cfg, metrics_store, races_root) elif external: if len(plugins) > 0: raise exceptions.SystemSetupError( "You cannot specify any plugins for externally provisioned clusters. Please remove " "\"--elasticsearch-plugins\" and try again.") s = lambda: None p = [provisioner.no_op_provisioner()] l = launcher.ExternalLauncher(cfg, metrics_store) elif docker: if len(plugins) > 0: raise exceptions.SystemSetupError( "You cannot specify any plugins for Docker clusters. Please remove " "\"--elasticsearch-plugins\" and try again.") s = lambda: None p = [] for node_id in node_ids: p.append( provisioner.docker_provisioner(cfg, car, cluster_settings, challenge_root_path, node_id)) l = launcher.DockerLauncher(cfg, metrics_store) else: # It is a programmer error (and not a user error) if this function is called with wrong parameters raise RuntimeError( "One of sources, distribution, docker or external must be True") return Mechanic(s, p, l)
def test_pass_java_opts(self): cfg = config.Config() cfg.add(config.Scope.application, "system", "env.name", "test") cfg.add(config.Scope.application, "system", "passenv", "ES_JAVA_OPTS") os.environ["ES_JAVA_OPTS"] = "-XX:-someJunk" proc_launcher = launcher.ProcessLauncher(cfg) t = telemetry.Telemetry() # no JAVA_HOME -> use the bundled JDK env = proc_launcher._prepare_env(node_name="node0", java_home=None, t=t) # unmodified assert env["ES_JAVA_OPTS"] == os.environ["ES_JAVA_OPTS"]
def test_bundled_jdk_not_in_path(self): cfg = config.Config() cfg.add(config.Scope.application, "system", "env.name", "test") os.environ["JAVA_HOME"] = "/path/to/java" proc_launcher = launcher.ProcessLauncher(cfg) t = telemetry.Telemetry() # no JAVA_HOME -> use the bundled JDK env = proc_launcher._prepare_env(node_name="node0", java_home=None, t=t) # unmodified assert env["PATH"] == os.environ["PATH"] assert env.get("JAVA_HOME") is None
def stop(cfg): root_path = paths.install_root(cfg) node_config = provisioner.load_node_configuration(root_path) if node_config.build_type == "tar": node_launcher = launcher.ProcessLauncher(cfg) elif node_config.build_type == "docker": node_launcher = launcher.DockerLauncher(cfg) else: raise exceptions.SystemSetupError("Unknown build type [{}]".format( node_config.build_type)) nodes, race_id = _load_node_file(root_path) cls = metrics.metrics_store_class(cfg) metrics_store = cls(cfg) race_store = metrics.race_store(cfg) try: current_race = race_store.find_by_race_id(race_id) except exceptions.NotFound: logging.getLogger(__name__).info( "Could not find race [%s] most likely because an in-memory metrics store is " "used across multiple machines. Use an Elasticsearch metrics store to persist " "results.", race_id) # we are assuming here that we use an Elasticsearch metrics store... . If we use a file race store (across # multiple machines) we will not be able to retrieve a race. In that case we open our in-memory metrics store # with settings derived from startup parameters (because we can't store system metrics persistently anyway). current_race = metrics.create_race(cfg, track=None, challenge=None) metrics_store.open(race_id=current_race.race_id, race_timestamp=current_race.race_timestamp, track_name=current_race.track_name, challenge_name=current_race.challenge_name) node_launcher.stop(nodes, metrics_store) _delete_node_file(root_path) metrics_store.flush(refresh=True) for node in nodes: results = metrics.calculate_system_results(metrics_store, node.node_name) current_race.add_results(results) metrics.results_store(cfg).store_results(current_race) metrics_store.close() # TODO: Do we need to expose this as a separate command as well? provisioner.cleanup(preserve=cfg.opts("mechanic", "preserve.install"), install_dir=node_config.binary_path, data_paths=node_config.data_paths)
def test_daemon_stop_with_already_terminated_process(self): cfg = config.Config() cfg.add(config.Scope.application, "node", "root.dir", "test") cfg.add(config.Scope.application, "telemetry", "devices", []) cfg.add(config.Scope.application, "telemetry", "params", None) cfg.add(config.Scope.application, "system", "env.name", "test") ms = get_metrics_store(cfg) proc_launcher = launcher.ProcessLauncher(cfg) nodes = [cluster.Node(pid=-1, binary_path="/bin", host_name="localhost", node_name="rally-0", telemetry=telemetry.Telemetry())] stopped_nodes = proc_launcher.stop(nodes, ms) # no nodes should have been stopped (they were already stopped) assert stopped_nodes == []
def stop(cfg): root_path = paths.install_root(cfg) node_config = provisioner.load_node_configuration(root_path) if node_config.build_type == "tar": node_launcher = launcher.ProcessLauncher(cfg) elif node_config.build_type == "docker": node_launcher = launcher.DockerLauncher(cfg) else: raise exceptions.SystemSetupError("Unknown build type [{}]".format( node_config.build_type)) nodes, race_id = _load_node_file(root_path) cls = metrics.metrics_store_class(cfg) metrics_store = cls(cfg) race_store = metrics.race_store(cfg) try: current_race = race_store.find_by_race_id(race_id) metrics_store.open(race_id=current_race.race_id, race_timestamp=current_race.race_timestamp, track_name=current_race.track_name, challenge_name=current_race.challenge_name) except exceptions.NotFound: logging.getLogger(__name__).info( "Could not find race [%s] and will thus not persist system metrics.", race_id) # Don't persist system metrics if we can't retrieve the race as we cannot derive the required meta-data. current_race = None metrics_store = None node_launcher.stop(nodes, metrics_store) _delete_node_file(root_path) if current_race: metrics_store.flush(refresh=True) for node in nodes: results = metrics.calculate_system_results(metrics_store, node.node_name) current_race.add_results(results) metrics.results_store(cfg).store_results(current_race) metrics_store.close() # TODO: Do we need to expose this as a separate command as well? provisioner.cleanup(preserve=cfg.opts("mechanic", "preserve.install"), install_dir=node_config.binary_path, data_paths=node_config.data_paths)
def test_pass_env_vars(self): cfg = config.Config() cfg.add(config.Scope.application, "system", "env.name", "test") cfg.add(config.Scope.application, "system", "passenv", "JAVA_HOME,FOO1") os.environ["JAVA_HOME"] = "/path/to/java" os.environ["FOO1"] = "BAR1" proc_launcher = launcher.ProcessLauncher(cfg) t = telemetry.Telemetry() # no JAVA_HOME -> use the bundled JDK env = proc_launcher._prepare_env(node_name="node0", java_home=None, t=t) # unmodified assert env["JAVA_HOME"] == os.environ["JAVA_HOME"] assert env["FOO1"] == os.environ["FOO1"] assert env["ES_JAVA_OPTS"] == "-XX:+ExitOnOutOfMemoryError"
def start(cfg): root_path = paths.install_root(cfg) race_id = cfg.opts("system", "race.id") # avoid double-launching - we expect that the node file is absent with contextlib.suppress(FileNotFoundError): _load_node_file(root_path) install_id = cfg.opts("system", "install.id") raise exceptions.SystemSetupError("A node with this installation id is already running. Please stop it first " "with {} stop --installation-id={}".format(PROGRAM_NAME, install_id)) node_config = provisioner.load_node_configuration(root_path) if node_config.build_type == "tar": node_launcher = launcher.ProcessLauncher(cfg) elif node_config.build_type == "docker": node_launcher = launcher.DockerLauncher(cfg) else: raise exceptions.SystemSetupError("Unknown build type [{}]".format(node_config.build_type)) nodes = node_launcher.start([node_config]) _store_node_file(root_path, (nodes, race_id))