Пример #1
0
    def rsync_down(self, source, target):
        cli_logger.old_info(logger, "{}Syncing {} from {}...", self.log_prefix,
                            source, target)

        self.cmd_runner.run_rsync_down(source, target)
        cli_logger.verbose("`rsync`ed {} (remote) to {} (local)",
                           cf.bold(source), cf.bold(target))
Пример #2
0
        def do_sync(remote_path, local_path, allow_non_existing_paths=False):
            if allow_non_existing_paths and not os.path.exists(local_path):
                # Ignore missing source files. In the future we should support
                # the --delete-missing-args command to delete files that have
                # been removed
                return

            assert os.path.exists(local_path), local_path

            if os.path.isdir(local_path):
                if not local_path.endswith("/"):
                    local_path += "/"
                if not remote_path.endswith("/"):
                    remote_path += "/"

            with LogTimer(self.log_prefix +
                          "Synced {} to {}".format(local_path, remote_path)):
                self.cmd_runner.run("mkdir -p {}".format(
                    os.path.dirname(remote_path)),
                                    run_env="host")
                sync_cmd(local_path, remote_path)

                if remote_path not in nolog_paths:
                    # todo: timed here?
                    cli_logger.print("{} from {}", cf.bold(remote_path),
                                     cf.bold(local_path))
Пример #3
0
        def do_sync(remote_path, local_path, allow_non_existing_paths=False):
            if allow_non_existing_paths and not os.path.exists(local_path):
                cli_logger.print("sync: {} does not exist. Skipping.",
                                 local_path)
                # Ignore missing source files. In the future we should support
                # the --delete-missing-args command to delete files that have
                # been removed
                return

            assert os.path.exists(local_path), local_path

            if os.path.isdir(local_path):
                if not local_path.endswith("/"):
                    local_path += "/"
                if not remote_path.endswith("/"):
                    remote_path += "/"

            with LogTimer(self.log_prefix +
                          "Synced {} to {}".format(local_path, remote_path)):
                is_docker = (self.docker_config
                             and self.docker_config["container_name"] != "")
                if not is_docker:
                    # The DockerCommandRunner handles this internally.
                    self.cmd_runner.run("mkdir -p {}".format(
                        os.path.dirname(remote_path)),
                                        run_env="host")
                sync_cmd(local_path, remote_path, file_mount=True)

                if remote_path not in nolog_paths:
                    # todo: timed here?
                    cli_logger.print("{} from {}", cf.bold(remote_path),
                                     cf.bold(local_path))
Пример #4
0
        def remaining_nodes():
            workers = provider.non_terminated_nodes(
                {TAG_RAY_NODE_KIND: NODE_KIND_WORKER})

            if keep_min_workers:
                min_workers = config.get("min_workers", 0)

                cli_logger.print(
                    "{} random worker nodes will not be shut down. " +
                    cf.dimmed("(due to {})"), cf.bold(min_workers),
                    cf.bold("--keep-min-workers"))
                cli_logger.old_info(logger,
                                    "teardown_cluster: Keeping {} nodes...",
                                    min_workers)

                workers = random.sample(workers, len(workers) - min_workers)

            # todo: it's weird to kill the head node but not all workers
            if workers_only:
                cli_logger.print(
                    "The head node will not be shut down. " +
                    cf.dimmed("(due to {})"), cf.bold("--workers-only"))

                return workers

            head = provider.non_terminated_nodes(
                {TAG_RAY_NODE_KIND: NODE_KIND_HEAD})

            return head + workers
Пример #5
0
    def sync_file_mounts(self, sync_cmd):
        nolog_paths = []
        if cli_logger.verbosity == 0:
            nolog_paths = [
                "~/ray_bootstrap_key.pem", "~/ray_bootstrap_config.yaml"
            ]

        # Rsync file mounts
        with cli_logger.group("Processing file mounts",
                              _numbered=("[]", 2, 5)):
            for remote_path, local_path in self.file_mounts.items():
                assert os.path.exists(local_path), local_path
                if os.path.isdir(local_path):
                    if not local_path.endswith("/"):
                        local_path += "/"
                    if not remote_path.endswith("/"):
                        remote_path += "/"

                with LogTimer(
                        self.log_prefix +
                        "Synced {} to {}".format(local_path, remote_path)):
                    self.cmd_runner.run("mkdir -p {}".format(
                        os.path.dirname(remote_path)))
                    sync_cmd(local_path, remote_path)

                    if remote_path not in nolog_paths:
                        # todo: timed here?
                        cli_logger.print("{} from {}", cf.bold(remote_path),
                                         cf.bold(local_path))
Пример #6
0
    def run(self,
            cmd,
            timeout=120,
            exit_on_fail=False,
            port_forward=None,
            with_output=False,
            ssh_options_override=None,
            **kwargs):
        ssh_options = ssh_options_override or self.ssh_options

        assert isinstance(
            ssh_options, SSHOptions
        ), "ssh_options must be of type SSHOptions, got {}".format(
            type(ssh_options))

        self._set_ssh_ip_if_required()

        if is_using_login_shells():
            ssh = ["ssh", "-tt"]
        else:
            ssh = ["ssh"]

        if port_forward:
            with cli_logger.group("Forwarding ports"):
                if not isinstance(port_forward, list):
                    port_forward = [port_forward]
                for local, remote in port_forward:
                    cli_logger.verbose(
                        "Forwarding port {} to port {} on localhost.",
                        cf.bold(local), cf.bold(remote))  # todo: msg
                    cli_logger.old_info(logger,
                                        "{}Forwarding {} -> localhost:{}",
                                        self.log_prefix, local, remote)
                    ssh += ["-L", "{}:localhost:{}".format(remote, local)]

        final_cmd = ssh + ssh_options.to_ssh_options_list(
            timeout=timeout) + ["{}@{}".format(self.ssh_user, self.ssh_ip)]
        if cmd:
            if is_using_login_shells():
                final_cmd += _with_interactive(cmd)
            else:
                final_cmd += [cmd]
            cli_logger.old_info(logger, "{}Running {}", self.log_prefix,
                                " ".join(final_cmd))
        else:
            # We do this because `-o ControlMaster` causes the `-N` flag to
            # still create an interactive shell in some ssh versions.
            final_cmd.append(quote("while true; do sleep 86400; done"))

        cli_logger.verbose("Running `{}`", cf.bold(cmd))
        with cli_logger.indented():
            cli_logger.very_verbose("Full command is `{}`",
                                    cf.bold(" ".join(final_cmd)))

        if cli_logger.verbosity > 0:
            with cli_logger.indented():
                return self._run_helper(final_cmd, with_output, exit_on_fail)
        else:
            return self._run_helper(final_cmd, with_output, exit_on_fail)
Пример #7
0
    def rsync_down(self, source, target, file_mount=False):
        cli_logger.old_info(logger, "{}Syncing {} from {}...", self.log_prefix,
                            source, target)

        options = {}
        options["file_mount"] = file_mount
        self.cmd_runner.run_rsync_down(source, target, options=options)
        cli_logger.verbose("`rsync`ed {} (remote) to {} (local)",
                           cf.bold(source), cf.bold(target))
Пример #8
0
def _bootstrap_config(config: Dict[str, Any],
                      no_config_cache: bool = False) -> Dict[str, Any]:
    config = prepare_config(config)

    hasher = hashlib.sha1()
    hasher.update(json.dumps([config], sort_keys=True).encode("utf-8"))
    cache_key = os.path.join(tempfile.gettempdir(),
                             "ray-config-{}".format(hasher.hexdigest()))

    if os.path.exists(cache_key) and not no_config_cache:
        cli_logger.old_info(logger, "Using cached config at {}", cache_key)

        config_cache = json.loads(open(cache_key).read())
        if config_cache.get("_version", -1) == CONFIG_CACHE_VERSION:
            # todo: is it fine to re-resolve? afaik it should be.
            # we can have migrations otherwise or something
            # but this seems overcomplicated given that resolving is
            # relatively cheap
            try_reload_log_state(config_cache["config"]["provider"],
                                 config_cache.get("provider_log_info"))
            cli_logger.verbose("Loaded cached config from " + cf.bold("{}"),
                               cache_key)

            return config_cache["config"]
        else:
            cli_logger.warning(
                "Found cached cluster config "
                "but the version " + cf.bold("{}") + " "
                "(expected " + cf.bold("{}") + ") does not match.\n"
                "This is normal if cluster launcher was updated.\n"
                "Config will be re-resolved.",
                config_cache.get("_version", "none"), CONFIG_CACHE_VERSION)
    validate_config(config)

    importer = NODE_PROVIDERS.get(config["provider"]["type"])
    if not importer:
        raise NotImplementedError("Unsupported provider {}".format(
            config["provider"]))

    provider_cls = importer(config["provider"])

    with cli_logger.timed(  # todo: better message
            "Bootstraping {} config",
            PROVIDER_PRETTY_NAMES.get(config["provider"]["type"])):
        resolved_config = provider_cls.bootstrap_config(config)

    if not no_config_cache:
        with open(cache_key, "w") as f:
            config_cache = {
                "_version": CONFIG_CACHE_VERSION,
                "provider_log_info": try_get_log_state(config["provider"]),
                "config": resolved_config
            }
            f.write(json.dumps(config_cache))
    return resolved_config
Пример #9
0
    def __log_room(platform, category, game, name, room, live, desc):
        print(platform, end="  ")

        print("%s >" % category, colorful.bold(game))
        print(" ", colorful.bold(name), "(%s)" % room, end=" ")
        if live:
            print(colorful.green("● 正在直播"))
        else:
            print(colorful.dimGray("● 未直播"))
        print(desc)
        print()
Пример #10
0
 def handle_cli_override(key, override):
     if override is not None:
         if key in config:
             nonlocal printed_overrides
             printed_overrides = True
             cli_logger.warning(
                 "`{}` override provided on the command line.\n"
                 "  Using " + cf.bold("{}") +
                 cf.dimmed(" [configuration file has " + cf.bold("{}") +
                           "]"), key, override, config[key])
         config[key] = override
Пример #11
0
def init(db: str = None, schema: str = "schema.sql", overwrite: bool = False):
    """
    Create an initial schema SQL file, optionally from an existing database.
    :param db: An optional database to create the schema from.
    :param schema: An optional file to write schema to. Default: schema.sql
    :param overwrite: Overwrite existing file.
    """
    if os.path.exists(schema) and not overwrite:
        print(cf.bold_red("Error:"),
              f'File "{schema}" already exists.',
              file=sys.stderr)
        print("Run again with", cf.bold("--overwrite"), "to replace.")
        sys.exit(os.EX_OSFILE)

    if not db:
        with open(schema, "w") as f:
            f.write("")

        print(cf.bold("All done! ✨"))
        print(f'Created blank file "{schema}"')
        sys.exit()

    base_uri = copy_url(db)
    target_exists = database_exists(base_uri, test_can_select=True)

    if not target_exists:
        print(
            cf.bold_red("Error:"),
            f'Database "{base_uri.database}" does not exist.',
        )
        sys.exit(os.EX_NOHOST)

    sql = ""

    patch = create_admin_patch(base_uri)
    patch.start()

    with temporary_database(base_uri) as sTemp, S(db) as sFrom:
        # Compare
        m = Migration(sTemp, sFrom)
        m.add_all_changes()
        m.set_safety(False)

        # Get SQL
        sql = m.sql

    with open(schema, "wb") as f:
        f.write(pg_format(sql.encode(), unquote=False))

    print(cf.bold("All done! ✨"))
    print(f'Created file "{schema}" with schema from "{base_uri.database}"')
    sys.exit()
Пример #12
0
    def run(self):
        cli_logger.old_info(logger, "{}Updating to {}", self.log_prefix,
                            self.runtime_hash)

        try:
            with LogTimer(self.log_prefix +
                          "Applied config {}".format(self.runtime_hash)):
                self.do_update()
        except Exception as e:
            error_str = str(e)
            if hasattr(e, "cmd"):
                error_str = "(Exit Status {}) {}".format(
                    e.returncode, " ".join(e.cmd))

            self.provider.set_node_tags(
                self.node_id, {TAG_RAY_NODE_STATUS: STATUS_UPDATE_FAILED})
            cli_logger.error("New status: {}", cf.bold(STATUS_UPDATE_FAILED))

            cli_logger.old_error(logger, "{}Error executing: {}\n",
                                 self.log_prefix, error_str)

            cli_logger.error("!!!")
            if hasattr(e, "cmd"):
                cli_logger.error(
                    "Setup command `{}` failed with exit code {}. stderr:",
                    cf.bold(e.cmd), e.returncode)
            else:
                cli_logger.verbose_error("{}", str(vars(e)))
                # todo: handle this better somehow?
                cli_logger.error("{}", str(e))
            # todo: print stderr here
            cli_logger.error("!!!")
            cli_logger.newline()

            if isinstance(e, click.ClickException):
                # todo: why do we ignore this here
                return
            raise

        tags_to_set = {
            TAG_RAY_NODE_STATUS: STATUS_UP_TO_DATE,
            TAG_RAY_RUNTIME_CONFIG: self.runtime_hash,
        }
        if self.file_mounts_contents_hash is not None:
            tags_to_set[
                TAG_RAY_FILE_MOUNTS_CONTENTS] = self.file_mounts_contents_hash

        self.provider.set_node_tags(self.node_id, tags_to_set)
        cli_logger.labeled_value("New status", STATUS_UP_TO_DATE)

        self.exitcode = 0
Пример #13
0
    def wait_ready(self, deadline):
        with cli_logger.group("Waiting for SSH to become available",
                              _numbered=("[]", 1, 6)):
            with LogTimer(self.log_prefix + "Got remote shell"):
                cli_logger.old_info(logger, "{}Waiting for remote shell...",
                                    self.log_prefix)

                cli_logger.print("Running `{}` as a test.", cf.bold("uptime"))
                first_conn_refused_time = None
                while time.time() < deadline and \
                        not self.provider.is_terminated(self.node_id):
                    try:
                        cli_logger.old_debug(logger,
                                             "{}Waiting for remote shell...",
                                             self.log_prefix)

                        # Run outside of the container
                        self.cmd_runner.run("uptime", run_env="host")
                        cli_logger.old_debug(logger, "Uptime succeeded.")
                        cli_logger.success("Success.")
                        return True
                    except ProcessRunnerError as e:
                        first_conn_refused_time = \
                            cmd_output_util.handle_ssh_fails(
                                e, first_conn_refused_time,
                                retry_interval=READY_CHECK_INTERVAL)
                        time.sleep(READY_CHECK_INTERVAL)
                    except Exception as e:
                        # TODO(maximsmol): we should not be ignoring
                        # exceptions if they get filtered properly
                        # (new style log + non-interactive shells)
                        #
                        # however threading this configuration state
                        # is a pain and I'm leaving it for later

                        retry_str = str(e)
                        if hasattr(e, "cmd"):
                            retry_str = "(Exit Status {}): {}".format(
                                e.returncode, " ".join(e.cmd))

                        cli_logger.print(
                            "SSH still not available {}, "
                            "retrying in {} seconds.", cf.dimmed(retry_str),
                            cf.bold(str(READY_CHECK_INTERVAL)))
                        cli_logger.old_debug(logger,
                                             "{}Node not up, retrying: {}",
                                             self.log_prefix, retry_str)

                        time.sleep(READY_CHECK_INTERVAL)

        assert False, "Unable to connect to node"
Пример #14
0
def apply_statements(statements: str, db: str):
    """
    Apply a file of SQL statements to a database.
    :param statements: An SQL file of statements to apply to the database.
    :param db: A database to target.
    """
    if not os.path.exists(statements):
        print(
            cf.bold_red("Error:"),
            f'Could not find file "{statements}"',
            file=sys.stderr,
        )
        sys.exit(os.EX_OSFILE)

    base_uri = copy_url(db)
    target_exists = database_exists(base_uri, test_can_select=True)

    if not target_exists:
        print(
            cf.bold_red("Error:"),
            f'Database "{base_uri.database}" does not exist.',
        )
        sys.exit(os.EX_NOHOST)

    with S(db) as s:
        try:
            load_sql_from_file(s, statements)
        except Exception as e:
            print(cf.bold_red("Error:"), e, file=sys.stderr)
            sys.exit(os.EX_DATAERR)

    print(cf.bold("All done! ✨"))
Пример #15
0
def eval_cli_command(is_debug_mode, path_to_embedding, tasks_path, tasks):
    """Evaluate and generate reports for a NLP Word Embedding (default command)

    The Word Embeddings need to be provided as word2vec keyed vectors in a file.
    The file can either be in a binary format (if the file has the .bin) extension
    or in plain text (if the file has the .vec) extension.
    """
    # load the Word Embedding
    print(cf.italic(f"Loading embedding {path_to_embedding} ..."),
          flush=True,
          end=" ")
    is_binary_format = path_to_embedding.suffix == ".bin"
    try:
        embedding = load_embedding(path_to_embedding, binary=is_binary_format)
    except EmbedevalError as exc:
        print(cf.bold_firebrick("[FAILED]"), flush=True, end="\n\n")
        print(f"{cf.bold_firebrick('Error:')} {cf.firebrick(exc)}",
              file=sys.stderr)
        raise click.Abort()
    else:
        print(cf.bold("[OK]"), flush=True, end="\n\n")

    # evaluate all tasks
    logger.debug("Evaluating %d Tasks ...", len(tasks))
    for task_nbr, task in enumerate(tasks, start=1):
        logger.debug("Evaluating Task %s ...", task.NAME)
        try:
            report = task.evaluate(embedding)
            print(report, end="\n\n", flush=True)
        except Exception as exc:
            print(cf.firebrick(f"Failed to evaluate task: {exc}"),
                  end="\n\n",
                  flush=True)
        logger.debug("Evaluated %d of %d Tasks", task_nbr, len(tasks))
Пример #16
0
def show():
    """
    Show the modifiers and colors
    """
    # modifiers
    sys.stdout.write(colorful.bold('bold') + ' ')
    sys.stdout.write(colorful.dimmed('dimmed') + ' ')
    sys.stdout.write(colorful.italic('italic') + ' ')
    sys.stdout.write(colorful.underlined('underlined') + ' ')
    sys.stdout.write(colorful.inversed('inversed') + ' ')
    sys.stdout.write(colorful.concealed('concealed') + ' ')
    sys.stdout.write(colorful.struckthrough('struckthrough') + '\n')

    # foreground colors
    sys.stdout.write(colorful.red('red') + ' ')
    sys.stdout.write(colorful.green('green') + ' ')
    sys.stdout.write(colorful.yellow('yellow') + ' ')
    sys.stdout.write(colorful.blue('blue') + ' ')
    sys.stdout.write(colorful.magenta('magenta') + ' ')
    sys.stdout.write(colorful.cyan('cyan') + ' ')
    sys.stdout.write(colorful.white('white') + '\n')

    # background colors
    sys.stdout.write(colorful.on_red('red') + ' ')
    sys.stdout.write(colorful.on_green('green') + ' ')
    sys.stdout.write(colorful.on_yellow('yellow') + ' ')
    sys.stdout.write(colorful.on_blue('blue') + ' ')
    sys.stdout.write(colorful.on_magenta('magenta') + ' ')
    sys.stdout.write(colorful.on_cyan('cyan') + ' ')
    sys.stdout.write(colorful.on_white('white') + '\n')
Пример #17
0
    def _make_request(self, query_stmt):
        payload = {"nrql": query_stmt}
        req = requests.get(
            self._url % self.account_id,
            headers={"X-Query-Key": self.api_key},
            params=payload,
        )
        if self.verbose:
            print(colorful.bold("Request URL: %s" % req.url))
            print(colorful.bold("Status Code: %s" % req.status_code))
        response = req.json()
        self._print_messages(response)
        if not self.verbose:
            response.pop("metadata", None)
            response.pop("performanceStats", None)

        return response
Пример #18
0
 def handle_yaml_error(e):
     cli_logger.error(
         "Cluster config invalid.\n"
         "Failed to load YAML file " + cf.bold("{}"), config_file)
     cli_logger.newline()
     with cli_logger.verbatim_error_ctx("PyYAML error:"):
         cli_logger.error(e)
     cli_logger.abort()
Пример #19
0
def handle_ssh_fails(e, first_conn_refused_time, retry_interval):
    """Handle SSH system failures coming from a subprocess.

    Args:
        e: The `ProcessRunnerException` to handle.
        first_conn_refused_time:
            The time (as reported by this function) or None,
            indicating the last time a CONN_REFUSED error was caught.

            After exceeding a patience value, the program will be aborted
            since SSH will likely never recover.
        retry_interval: The interval after which the command will be retried,
                        used here just to inform the user.
    """
    if e.msg_type != "ssh_command_failed":
        return

    if e.special_case == "ssh_conn_refused":
        if first_conn_refused_time is not None and \
            time.time() - first_conn_refused_time > \
                CONN_REFUSED_PATIENCE:
            cli_logger.error(
                "SSH connection was being refused "
                "for {} seconds. Head node assumed "
                "unreachable.", cf.bold(str(CONN_REFUSED_PATIENCE)))
            cli_logger.abort("Check the node's firewall settings "
                             "and the cloud network configuration.")

        cli_logger.warning("SSH connection was refused.")
        cli_logger.warning("This might mean that the SSH daemon is "
                           "still setting up, or that "
                           "the host is inaccessable (e.g. due to "
                           "a firewall).")

        return time.time()

    if e.special_case in ["ssh_timeout", "ssh_conn_refused"]:
        cli_logger.print("SSH still not available, "
                         "retrying in {} seconds.",
                         cf.bold(str(retry_interval)))
    else:
        raise e

    return first_conn_refused_time
Пример #20
0
    def labeled_value(self, key, msg, *args, **kwargs):
        """Displays a key-value pair with special formatting.

        Args:
            key (str): Label that is prepended to the message.

        For other arguments, see `_format_msg`.
        """
        self._print(
            cf.cyan(key) + ": " + _format_msg(cf.bold(msg), *args, **kwargs))
Пример #21
0
 def run_rsync_up(self, source, target):
     self._set_ssh_ip_if_required()
     command = [
         "rsync", "--rsh",
         subprocess.list2cmdline(["ssh"] +
                                 self.ssh_options.to_ssh_options_list(
                                     timeout=120)), "-avz", source,
         "{}@{}:{}".format(self.ssh_user, self.ssh_ip, target)
     ]
     cli_logger.verbose("Running `{}`", cf.bold(" ".join(command)))
     self._run_helper(command, silent=is_rsync_silent())
Пример #22
0
    def run_rsync_down(self, source, target):
        self._set_ssh_ip_if_required()

        command = [
            "rsync", "--rsh",
            subprocess.list2cmdline(["ssh"] +
                                    self.ssh_options.to_ssh_options_list(
                                        timeout=120)), "-avz",
            "{}@{}:{}".format(self.ssh_user, self.ssh_ip, source), target
        ]
        cli_logger.verbose("Running `{}`", cf.bold(" ".join(command)))
        self.process_runner.check_call(command)
Пример #23
0
def _get_key(key_name, config):
    ec2 = _resource("ec2", config)
    try:
        for key in ec2.key_pairs.filter(Filters=[{
                "Name": "key-name",
                "Values": [key_name]
        }]):
            if key.name == key_name:
                return key
    except botocore.exceptions.ClientError as exc:
        handle_boto_error(exc, "Failed to fetch EC2 key pair {} from AWS.",
                          cf.bold(key_name))
        raise exc
Пример #24
0
def _get_role(role_name, config):
    iam = _resource("iam", config)
    role = iam.Role(role_name)
    try:
        role.load()
        return role
    except botocore.exceptions.ClientError as exc:
        if exc.response.get("Error", {}).get("Code") == "NoSuchEntity":
            return None
        else:
            handle_boto_error(
                exc, "Failed to fetch IAM role data for {} from AWS.",
                cf.bold(role_name))
            raise exc
Пример #25
0
 def _multiple_account_handler(self):
     if self.environment is None:
         nr_api_key = os.environ.get("NR_API_KEY")
         nr_account_id = os.environ.get("NR_ACCOUNT_ID")
     else:
         nr_api_key = os.environ.get("NR_API_KEY_%s" %
                                     self.environment.upper())
         nr_account_id = os.environ.get("NR_ACCOUNT_ID_%s" %
                                        self.environment.upper())
         if not nr_account_id or not nr_api_key:
             print(
                 colorful.bold("%s account environment variables not set." %
                               self.environment))
     return nr_api_key, nr_account_id
Пример #26
0
    def labeled_value(self, key: str, msg: str, *args: Any, **kwargs: Any):
        """Displays a key-value pair with special formatting.

        Args:
            key (str): Label that is prepended to the message.

        For other arguments, see `_format_msg`.
        """
        if self.old_style:
            return

        self._print(
            cf.skyBlue(key) + ": " +
            _format_msg(cf.bold(msg), *args, **kwargs))
Пример #27
0
def up(cluster_config_file, min_workers, max_workers, no_restart, restart_only,
       yes, cluster_name, no_config_cache, redirect_command_output,
       use_login_shells, log_style, log_color, verbose):
    """Create or update a Ray cluster."""
    cli_logger.configure(log_style, log_color, verbose)

    if restart_only or no_restart:
        cli_logger.doassert(restart_only != no_restart,
                            "`{}` is incompatible with `{}`.",
                            cf.bold("--restart-only"), cf.bold("--no-restart"))
        assert restart_only != no_restart, "Cannot set both 'restart_only' " \
            "and 'no_restart' at the same time!"

    if urllib.parse.urlparse(cluster_config_file).scheme in ("http", "https"):
        try:
            response = urllib.request.urlopen(cluster_config_file, timeout=5)
            content = response.read()
            file_name = cluster_config_file.split("/")[-1]
            with open(file_name, "wb") as f:
                f.write(content)
            cluster_config_file = file_name
        except urllib.error.HTTPError as e:
            cli_logger.warning("{}", str(e))
            cli_logger.warning(
                "Could not download remote cluster configuration file.")
            cli_logger.old_info(logger, "Error downloading file: ", e)
    create_or_update_cluster(
        config_file=cluster_config_file,
        override_min_workers=min_workers,
        override_max_workers=max_workers,
        no_restart=no_restart,
        restart_only=restart_only,
        yes=yes,
        override_cluster_name=cluster_name,
        no_config_cache=no_config_cache,
        redirect_command_output=redirect_command_output,
        use_login_shells=use_login_shells)
Пример #28
0
def kill_node(config_file, yes, hard, override_cluster_name):
    """Kills a random Raylet worker."""

    config = yaml.safe_load(open(config_file).read())
    if override_cluster_name is not None:
        config["cluster_name"] = override_cluster_name
    config = _bootstrap_config(config)

    cli_logger.confirm(yes, "A random node will be killed.")
    cli_logger.old_confirm("This will kill a node in your cluster", yes)

    provider = get_node_provider(config["provider"], config["cluster_name"])
    try:
        nodes = provider.non_terminated_nodes({
            TAG_RAY_NODE_KIND: NODE_KIND_WORKER
        })
        node = random.choice(nodes)
        cli_logger.print("Shutdown " + cf.bold("{}"), node)
        cli_logger.old_info(logger, "kill_node: Shutdown worker {}", node)
        if hard:
            provider.terminate_node(node)
        else:
            updater = NodeUpdaterThread(
                node_id=node,
                provider_config=config["provider"],
                provider=provider,
                auth_config=config["auth"],
                cluster_name=config["cluster_name"],
                file_mounts=config["file_mounts"],
                initialization_commands=[],
                setup_commands=[],
                ray_start_commands=[],
                runtime_hash="",
                file_mounts_contents_hash="",
                is_head_node=False,
                docker_config=config.get("docker"))

            _exec(updater, "ray stop", False, False)

        time.sleep(5)

        if config.get("provider", {}).get("use_internal_ips", False) is True:
            node_ip = provider.internal_ip(node)
        else:
            node_ip = provider.external_ip(node)
    finally:
        provider.cleanup()

    return node_ip
Пример #29
0
    def wait_ready(self, deadline):
        with cli_logger.group(
                "Waiting for SSH to become available", _numbered=("[]", 1, 6)):
            with LogTimer(self.log_prefix + "Got remote shell"):
                cli_logger.old_info(logger, "{}Waiting for remote shell...",
                                    self.log_prefix)

                cli_logger.print("Running `{}` as a test.", cf.bold("uptime"))
                while time.time() < deadline and \
                        not self.provider.is_terminated(self.node_id):
                    try:
                        cli_logger.old_debug(logger,
                                             "{}Waiting for remote shell...",
                                             self.log_prefix)

                        self.cmd_runner.run("uptime")
                        cli_logger.old_debug(logger, "Uptime succeeded.")
                        cli_logger.success("Success.")
                        return True
                    except Exception as e:
                        retry_str = str(e)
                        if hasattr(e, "cmd"):
                            retry_str = "(Exit Status {}): {}".format(
                                e.returncode, " ".join(e.cmd))

                        cli_logger.print(
                            "SSH still not available {}, "
                            "retrying in {} seconds.", cf.gray(retry_str),
                            cf.bold(str(READY_CHECK_INTERVAL)))
                        cli_logger.old_debug(logger,
                                             "{}Node not up, retrying: {}",
                                             self.log_prefix, retry_str)

                        time.sleep(READY_CHECK_INTERVAL)

        assert False, "Unable to connect to node"
Пример #30
0
def run_cmd_redirected(cmd,
                       process_runner=subprocess,
                       silent=False,
                       use_login_shells=False):
    """Run a command and optionally redirect output to a file.

    Args:
        cmd (List[str]): Command to run.
        process_runner: Process runner used for executing commands.
        silent (bool): If true, the command output will be silenced completely
                       (redirected to /dev/null), unless verbose logging
                       is enabled. Use this for runnign utility commands like
                       rsync.
    """
    if silent and cli_logger.verbosity < 1:
        return _run_and_process_output(
            cmd,
            process_runner=process_runner,
            stdout_file=None,
            use_login_shells=use_login_shells)

    if not is_output_redirected():
        return _run_and_process_output(
            cmd,
            process_runner=process_runner,
            stdout_file=sys.stdout,
            use_login_shells=use_login_shells)
    else:
        tmpfile_path = os.path.join(
            tempfile.gettempdir(), "ray-up-{}-{}.txt".format(
                cmd[0], time.time()))
        with open(
                tmpfile_path,
                mode="w",
                # line buffering
                buffering=1) as tmp:
            cli_logger.verbose("Command stdout is redirected to {}",
                               cf.bold(tmp.name))

            return _run_and_process_output(
                cmd,
                process_runner=process_runner,
                stdout_file=tmp,
                stderr_file=tmp,
                use_login_shells=use_login_shells)