def terminate_node(self, node_id): node = self._get_cached_node(node_id) if self.cache_stopped_nodes: if node.spot_instance_request_id: cli_logger.print( "Terminating instance {} " + cf.dimmed("(cannot stop spot instances, only terminate)"), node_id) # todo: show node name? cli_logger.old_info( logger, "AWSNodeProvider: terminating node {} (spot nodes cannot " "be stopped, only terminated)", node_id) node.terminate() else: cli_logger.print("Stopping instance {} " + cf.dimmed( "(to terminate instead, " "set `cache_stopped_nodes: False` " "under `provider` in the cluster configuration)"), node_id) # todo: show node name? cli_logger.old_info( logger, "AWSNodeProvider: stopping node {}. To terminate nodes " "on stop, set 'cache_stopped_nodes: False' in the " "provider config.".format(node_id)) node.stop() else: node.terminate() self.tag_cache.pop(node_id, None) self.tag_cache_pending.pop(node_id, None)
def remaining_nodes(): workers = provider.non_terminated_nodes( {TAG_RAY_NODE_KIND: NODE_KIND_WORKER}) if keep_min_workers: min_workers = config.get("min_workers", 0) cli_logger.print( "{} random worker nodes will not be shut down. " + cf.dimmed("(due to {})"), cf.bold(min_workers), cf.bold("--keep-min-workers")) cli_logger.old_info(logger, "teardown_cluster: Keeping {} nodes...", min_workers) workers = random.sample(workers, len(workers) - min_workers) # todo: it's weird to kill the head node but not all workers if workers_only: cli_logger.print( "The head node will not be shut down. " + cf.dimmed("(due to {})"), cf.bold("--workers-only")) return workers head = provider.non_terminated_nodes( {TAG_RAY_NODE_KIND: NODE_KIND_HEAD}) return head + workers
def show(): """ Show the modifiers and colors """ # modifiers sys.stdout.write(colorful.bold('bold') + ' ') sys.stdout.write(colorful.dimmed('dimmed') + ' ') sys.stdout.write(colorful.italic('italic') + ' ') sys.stdout.write(colorful.underlined('underlined') + ' ') sys.stdout.write(colorful.inversed('inversed') + ' ') sys.stdout.write(colorful.concealed('concealed') + ' ') sys.stdout.write(colorful.struckthrough('struckthrough') + '\n') # foreground colors sys.stdout.write(colorful.red('red') + ' ') sys.stdout.write(colorful.green('green') + ' ') sys.stdout.write(colorful.yellow('yellow') + ' ') sys.stdout.write(colorful.blue('blue') + ' ') sys.stdout.write(colorful.magenta('magenta') + ' ') sys.stdout.write(colorful.cyan('cyan') + ' ') sys.stdout.write(colorful.white('white') + '\n') # background colors sys.stdout.write(colorful.on_red('red') + ' ') sys.stdout.write(colorful.on_green('green') + ' ') sys.stdout.write(colorful.on_yellow('yellow') + ' ') sys.stdout.write(colorful.on_blue('blue') + ' ') sys.stdout.write(colorful.on_magenta('magenta') + ' ') sys.stdout.write(colorful.on_cyan('cyan') + ' ') sys.stdout.write(colorful.on_white('white') + '\n')
def terminate_nodes(self, node_ids): if not node_ids: return if self.cache_stopped_nodes: spot_ids = [] on_demand_ids = [] for node_id in node_ids: if self._get_cached_node(node_id).spot_instance_request_id: spot_ids += [node_id] else: on_demand_ids += [node_id] if on_demand_ids: # todo: show node names? cli_logger.print( "Stopping instances {} " + cf.dimmed( "(to terminate instead, " "set `cache_stopped_nodes: False` " "under `provider` in the cluster configuration)"), cli_logger.render_list(on_demand_ids)) cli_logger.old_info( logger, "AWSNodeProvider: stopping nodes {}. To terminate nodes " "on stop, set 'cache_stopped_nodes: False' in the " "provider config.", on_demand_ids) self.ec2.meta.client.stop_instances(InstanceIds=on_demand_ids) if spot_ids: cli_logger.print( "Terminating instances {} " + cf.dimmed("(cannot stop spot instances, only terminate)"), cli_logger.render_list(spot_ids)) cli_logger.old_info( logger, "AWSNodeProvider: terminating nodes {} (spot nodes cannot " "be stopped, only terminated)", spot_ids) self.ec2.meta.client.terminate_instances(InstanceIds=spot_ids) else: self.ec2.meta.client.terminate_instances(InstanceIds=node_ids) for node_id in node_ids: self.tag_cache.pop(node_id, None) self.tag_cache_pending.pop(node_id, None)
def handle_cli_override(key, override): if override is not None: if key in config: nonlocal printed_overrides printed_overrides = True cli_logger.warning( "`{}` override provided on the command line.\n" " Using " + cf.bold("{}") + cf.dimmed(" [configuration file has " + cf.bold("{}") + "]"), key, override, config[key]) config[key] = override
def wait_ready(self, deadline): with cli_logger.group("Waiting for SSH to become available", _numbered=("[]", 1, 6)): with LogTimer(self.log_prefix + "Got remote shell"): cli_logger.old_info(logger, "{}Waiting for remote shell...", self.log_prefix) cli_logger.print("Running `{}` as a test.", cf.bold("uptime")) first_conn_refused_time = None while time.time() < deadline and \ not self.provider.is_terminated(self.node_id): try: cli_logger.old_debug(logger, "{}Waiting for remote shell...", self.log_prefix) # Run outside of the container self.cmd_runner.run("uptime", run_env="host") cli_logger.old_debug(logger, "Uptime succeeded.") cli_logger.success("Success.") return True except ProcessRunnerError as e: first_conn_refused_time = \ cmd_output_util.handle_ssh_fails( e, first_conn_refused_time, retry_interval=READY_CHECK_INTERVAL) time.sleep(READY_CHECK_INTERVAL) except Exception as e: # TODO(maximsmol): we should not be ignoring # exceptions if they get filtered properly # (new style log + non-interactive shells) # # however threading this configuration state # is a pain and I'm leaving it for later retry_str = str(e) if hasattr(e, "cmd"): retry_str = "(Exit Status {}): {}".format( e.returncode, " ".join(e.cmd)) cli_logger.print( "SSH still not available {}, " "retrying in {} seconds.", cf.dimmed(retry_str), cf.bold(str(READY_CHECK_INTERVAL))) cli_logger.old_debug(logger, "{}Node not up, retrying: {}", self.log_prefix, retry_str) time.sleep(READY_CHECK_INTERVAL) assert False, "Unable to connect to node"
def run_cmd_redirected(cmd, silent=False, use_login_shells=False): """Run a command and optionally redirect output to a file. Args: cmd (List[str]): Command to run. silent (bool): If true, the command output will be silenced completely (redirected to /dev/null), unless verbose logging is enabled. Use this for runnign utility commands like rsync. """ if silent and cli_logger.verbosity < 1: return _run_and_process_output( cmd, stdout_file=None, use_login_shells=use_login_shells) if not is_output_redirected(): return _run_and_process_output( cmd, stdout_file=sys.stdout, use_login_shells=use_login_shells) else: tmpfile_path = os.path.join( tempfile.gettempdir(), "ray-up-{}-{}.txt".format( cmd[0], time.time())) with open( tmpfile_path, mode="w", # line buffering buffering=1) as tmp: cli_logger.verbose("Command stdout is redirected to {}", cf.bold(tmp.name)) cli_logger.verbose( cf.dimmed("Use --dump-command-output to " "dump to terminal instead.")) return _run_and_process_output( cmd, stdout_file=tmp, stderr_file=tmp, use_login_shells=use_login_shells)
def _format_msg(msg: str, *args: Any, _tags: Dict[str, Any] = None, _numbered: Tuple[str, int, int] = None, _no_format: bool = None, **kwargs: Any): """Formats a message for printing. Renders `msg` using the built-in `str.format` and the passed-in `*args` and `**kwargs`. Args: *args (Any): `.format` arguments for `msg`. _tags (Dict[str, Any]): key-value pairs to display at the end of the message in square brackets. If a tag is set to `True`, it is printed without the value, the presence of the tag treated as a "flag". E.g. `_format_msg("hello", _tags=dict(from=mom, signed=True))` `hello [from=Mom, signed]` _numbered (Tuple[str, int, int]): `(brackets, i, n)` The `brackets` string is composed of two "bracket" characters, `i` is the index, `n` is the total. The string `{i}/{n}` surrounded by the "brackets" is prepended to the message. This is used to number steps in a procedure, with different brackets specifying different major tasks. E.g. `_format_msg("hello", _numbered=("[]", 0, 5))` `[0/5] hello` _no_format (bool): If `_no_format` is `True`, `.format` will not be called on the message. Useful if the output is user-provided or may otherwise contain an unexpected formatting string (e.g. "{}"). Returns: The formatted message. """ if isinstance(msg, str) or isinstance(msg, ColorfulString): tags_str = "" if _tags is not None: tags_list = [] for k, v in _tags.items(): if v is True: tags_list += [k] continue if v is False: continue tags_list += [k + "=" + v] if tags_list: tags_str = cf.reset( cf.dimmed(" [{}]".format(", ".join(tags_list)))) numbering_str = "" if _numbered is not None: chars, i, n = _numbered numbering_str = cf.dimmed(chars[0] + str(i) + "/" + str(n) + chars[1]) + " " if _no_format: # todo: throw if given args/kwargs? return numbering_str + msg + tags_str return numbering_str + cf.format(msg, *args, **kwargs) + tags_str if kwargs: raise ValueError("We do not support printing kwargs yet.") res = [msg, *args] res = [str(x) for x in res] return ", ".join(res)
def confirm(self, yes: bool, msg: str, *args: Any, _abort: bool = False, _default: bool = False, **kwargs: Any): """Display a confirmation dialog. Valid answers are "y/yes/true/1" and "n/no/false/0". Args: yes (bool): If `yes` is `True` the dialog will default to "yes" and continue without waiting for user input. _abort (bool): If `_abort` is `True`, "no" means aborting the program. _default (bool): The default action to take if the user just presses enter with no input. """ if self.old_style: return should_abort = _abort default = _default if not self.interactive and not yes: # no formatting around --yes here since this is non-interactive self.error("This command requires user confirmation. " "When running non-interactively, supply --yes to skip.") raise ValueError("Non-interactive confirm without --yes.") if default: yn_str = "Y/n" else: yn_str = "y/N" confirm_str = cf.underlined("Confirm [" + yn_str + "]:") + " " rendered_message = _format_msg(msg, *args, **kwargs) # the rendered message ends with ascii coding if rendered_message and not msg.endswith("\n"): rendered_message += " " msg_len = len(rendered_message.split("\n")[-1]) complete_str = rendered_message + confirm_str if yes: self._print(complete_str + "y " + cf.dimmed("[automatic, due to --yes]")) return True self._print(complete_str, _linefeed=False) res = None yes_answers = ["y", "yes", "true", "1"] no_answers = ["n", "no", "false", "0"] try: while True: ans = sys.stdin.readline() ans = ans.lower() if ans == "\n": res = default break ans = ans.strip() if ans in yes_answers: res = True break if ans in no_answers: res = False break indent = " " * msg_len self.error("{}Invalid answer: {}. " "Expected {} or {}", indent, cf.bold(ans.strip()), self.render_list(yes_answers, "/"), self.render_list(no_answers, "/")) self._print(indent + confirm_str, _linefeed=False) except KeyboardInterrupt: self.newline() res = default if not res and should_abort: # todo: make sure we tell the user if they # need to do cleanup self._print("Exiting...") raise SilentClickException( "Exiting due to the response to confirm(should_abort=True).") return res
def stop(force, verbose, log_style, log_color): """Stop Ray processes manually on the local machine.""" cli_logger.configure(log_style, log_color, verbose) # Note that raylet needs to exit before object store, otherwise # it cannot exit gracefully. is_linux = sys.platform.startswith("linux") processes_to_kill = [ # The first element is the substring to filter. # The second element, if True, is to filter ps results by command name # (only the first 15 charactors of the executable name on Linux); # if False, is to filter ps results by command with all its arguments. # See STANDARD FORMAT SPECIFIERS section of # http://man7.org/linux/man-pages/man1/ps.1.html # about comm and args. This can help avoid killing non-ray processes. # Format: # Keyword to filter, filter by command (True)/filter by args (False) ["raylet", True], ["plasma_store", True], ["gcs_server", True], ["monitor.py", False], ["redis-server", False], ["default_worker.py", False], # Python worker. ["ray::", True], # Python worker. TODO(mehrdadn): Fix for Windows ["io.ray.runtime.runner.worker.DefaultWorker", False], # Java worker. ["log_monitor.py", False], ["reporter.py", False], ["dashboard.py", False], ["ray_process_reaper.py", False], ] process_infos = [] for proc in psutil.process_iter(["name", "cmdline"]): try: process_infos.append((proc, proc.name(), proc.cmdline())) except psutil.Error: pass total_found = 0 total_stopped = 0 for keyword, filter_by_cmd in processes_to_kill: if filter_by_cmd and is_linux and len(keyword) > 15: # getting here is an internal bug, so we do not use cli_logger msg = ("The filter string should not be more than {} " "characters. Actual length: {}. Filter: {}").format( 15, len(keyword), keyword) raise ValueError(msg) found = [] for candidate in process_infos: proc, proc_cmd, proc_args = candidate corpus = (proc_cmd if filter_by_cmd else subprocess.list2cmdline(proc_args)) if keyword in corpus: found.append(candidate) for proc, proc_cmd, proc_args in found: total_found += 1 proc_string = str(subprocess.list2cmdline(proc_args)) if verbose: operation = "Terminating" if force else "Killing" cli_logger.old_info(logger, "%s process %s: %s", operation, proc.pid, proc_string) try: if force: proc.kill() else: # TODO(mehrdadn): On Windows, this is forceful termination. # We don't want CTRL_BREAK_EVENT, because that would # terminate the entire process group. What to do? proc.terminate() if force: cli_logger.verbose("Killed `{}` {} ", cf.bold(proc_string), cf.dimmed("(via SIGKILL)")) else: cli_logger.verbose("Send termination request to `{}` {}", cf.bold(proc_string), cf.dimmed("(via SIGTERM)")) total_stopped += 1 except psutil.NoSuchProcess: cli_logger.verbose( "Attempted to stop `{}`, but process was already dead.", cf.bold(proc_string)) pass except (psutil.Error, OSError) as ex: cli_logger.error("Could not terminate `{}` due to {}", cf.bold(proc_string), str(ex)) cli_logger.old_error(logger, "Error: %s", ex) if total_found == 0: cli_logger.print("Did not find any active Ray processes.") else: if total_stopped == total_found: cli_logger.success("Stopped all {} Ray processes.", total_stopped) else: cli_logger.warning( "Stopped only {} out of {} Ray processes. " "Set `{}` to see more details.", total_stopped, total_found, cf.bold("-v")) cli_logger.warning("Try running the command again, or use `{}`.", cf.bold("--force"))
results = [] for module in all_modules: print(colorful.bold("\n###### Check module '{}' ######\n".format(module))) result = Result() result.module = module result.modules = lbuild_query_modules(module) # Check previous results for previous in results: if result.modules == previous.modules: print("Similar to {}".format(previous.module)) result.similar = previous.module result.success = previous.success break else: # Unknown combination of modules, build the source code result.success = build(module) results.append(result) print(colorful.bold("\n\nResults:\n")) for result in results: result_output = colorful.green("SUCCESS") if result.success else colorful.red("FAIL") print("- {} - {}".format(result.module, result_output)) if result.similar is not None: print(colorful.dimmed(" Same modules as {}".format(result.similar))) else: print(colorful.dimmed(" {}".format( ", ".join([x.replace("outpost", "") for x in result.modules]))))