Exemplo n.º 1
0
def submit(cluster_config_file, docker, screen, tmux, stop, start,
           cluster_name, port_forward, script, args):
    """Uploads and runs a script on the specified cluster.

    The script is automatically synced to the following location:

        os.path.join("~", os.path.basename(script))

    Example:
        >>> ray submit [CLUSTER.YAML] experiment.py --args="--smoke-test"
    """
    assert not (screen and tmux), "Can specify only one of `screen` or `tmux`."

    if start:
        create_or_update_cluster(cluster_config_file, None, None, False, False,
                                 True, cluster_name)

    target = os.path.join("~", os.path.basename(script))
    rsync(cluster_config_file, script, target, cluster_name, down=False)

    command_parts = ["python", target]
    if args is not None:
        command_parts += [args]
    cmd = " ".join(command_parts)
    exec_cluster(cluster_config_file, cmd, docker, screen, tmux, stop, False,
                 cluster_name, port_forward)
Exemplo n.º 2
0
def submit(cluster_config_file, screen, tmux, stop, start, cluster_name,
           port_forward, script, script_args):
    """Uploads and runs a script on the specified cluster.

    The script is automatically synced to the following location:

        os.path.join("~", os.path.basename(script))
    """
    assert not (screen and tmux), "Can specify only one of `screen` or `tmux`."

    if start:
        create_or_update_cluster(cluster_config_file, None, None, False, False,
                                 True, cluster_name)

    target = os.path.join("~", os.path.basename(script))
    rsync(cluster_config_file, script, target, cluster_name, down=False)

    cmd = " ".join(["python", target] + list(script_args))
    exec_cluster(cluster_config_file, cmd, screen, tmux, stop, False,
                 cluster_name, port_forward)

    if tmux or screen:
        attach_command_parts = ["ray attach", cluster_config_file]
        if cluster_name is not None:
            attach_command_parts.append(
                "--cluster-name={}".format(cluster_name))
        if tmux:
            attach_command_parts.append("--tmux")
        elif screen:
            attach_command_parts.append("--screen")

        attach_command = " ".join(attach_command_parts)
        attach_info = "Use `{}` to check on command status.".format(
            attach_command)
        logger.info(attach_info)
Exemplo n.º 3
0
 def __do_spawn(self):
     try:
         create_or_update_cluster(
             self.config_file,
             override_min_workers=None,
             override_max_workers=None,
             no_restart=False,
             restart_only=False,
             yes=True,
             override_cluster_name=None,
             no_config_cache=False,
             log_old_style=False,
             log_color="auto",
             verbose=1,
         )
         # need to re-load the config, as create_or_update_cluster() modifies it
         with open(self.config_file) as inp:
             self.config = yaml.safe_load(inp.read())
         self.ready = True
     except BaseException as ex:
         self.spawner.exc = CannotSpawnCluster(
             "Cannot spawn cluster",
             cause=ex,
             traceback=traceback.format_exc())
         if not self.spawner.silent:
             sys.stderr.write(
                 f"Cannot spawn cluster:\n{traceback.format_exc()}\n")
Exemplo n.º 4
0
def create_or_update(cluster_config_file, min_workers, max_workers, no_restart,
                     restart_only, yes, cluster_name):
    if restart_only or no_restart:
        assert restart_only != no_restart, "Cannot set both 'restart_only' " \
            "and 'no_restart' at the same time!"
    create_or_update_cluster(cluster_config_file, min_workers, max_workers,
                             no_restart, restart_only, yes, cluster_name)
Exemplo n.º 5
0
def submit(cluster_config_file, screen, tmux, stop, start, cluster_name,
           port_forward, script, script_args):
    """Uploads and runs a script on the specified cluster.

    The script is automatically synced to the following location:

        os.path.join("~", os.path.basename(script))
    """
    assert not (screen and tmux), "Can specify only one of `screen` or `tmux`."

    if start:
        create_or_update_cluster(cluster_config_file, None, None, False, False,
                                 True, cluster_name)

    target = os.path.join("~", os.path.basename(script))
    rsync(cluster_config_file, script, target, cluster_name, down=False)

    cmd = " ".join(["python", target] + list(script_args))
    exec_cluster(cluster_config_file, cmd, screen, tmux, stop, False,
                 cluster_name, port_forward)

    if tmux or screen:
        attach_command_parts = ["ray attach", cluster_config_file]
        if cluster_name is not None:
            attach_command_parts.append(
                "--cluster-name={}".format(cluster_name))
        if tmux:
            attach_command_parts.append("--tmux")
        elif screen:
            attach_command_parts.append("--screen")

        attach_command = " ".join(attach_command_parts)
        attach_info = "Use `{}` to check on command status.".format(
            attach_command)
        logger.info(attach_info)
Exemplo n.º 6
0
def create_or_update(cluster_config_file, min_workers, max_workers, no_restart,
                     restart_only, yes, cluster_name):
    """Create or update a Ray cluster."""
    if restart_only or no_restart:
        assert restart_only != no_restart, "Cannot set both 'restart_only' " \
            "and 'no_restart' at the same time!"
    create_or_update_cluster(cluster_config_file, min_workers, max_workers,
                             no_restart, restart_only, yes, cluster_name)
Exemplo n.º 7
0
def start(command, args):
    project_definition = load_project_or_throw()

    if command:
        command_to_run = _get_command_to_run(command, project_definition, args)
    else:
        command_to_run = _get_command_to_run("default", project_definition,
                                             args)

    # Check for features we don't support right now
    project_environment = project_definition["environment"]
    need_docker = ("dockerfile" in project_environment
                   or "dockerimage" in project_environment)
    if need_docker:
        raise click.ClickException(
            "Docker support in session is currently not implemented. "
            "Please file an feature request at"
            "https://github.com/ray-project/ray/issues")

    cluster_yaml = project_definition["cluster"]
    working_directory = project_definition["name"]

    logger.info("[1/4] Creating cluster")
    create_or_update_cluster(
        config_file=cluster_yaml,
        override_min_workers=None,
        override_max_workers=None,
        no_restart=False,
        restart_only=False,
        yes=True,
        override_cluster_name=None,
    )

    logger.info("[2/4] Syncing the repo")
    if "repo" in project_definition:
        # HACK: Skip git clone if exists so the this command can be idempotent
        # More advanced repo update behavior can be found at
        # https://github.com/jupyterhub/nbgitpuller/blob/master/nbgitpuller/pull.py
        session_exec_cluster(
            cluster_yaml,
            "git clone {repo} {directory} || true".format(
                repo=project_definition["repo"],
                directory=project_definition["name"]),
        )
    else:
        session_exec_cluster(
            cluster_yaml, "mkdir {directory} || true".format(
                directory=project_definition["name"]))

    logger.info("[3/4] Setting up environment")
    _setup_environment(cluster_yaml,
                       project_definition["environment"],
                       cwd=working_directory)

    logger.info("[4/4] Running command")
    logger.debug("Running {}".format(command))
    session_exec_cluster(cluster_yaml, command_to_run, cwd=working_directory)
Exemplo n.º 8
0
def start(command, args, shell):
    project_definition = load_project_or_throw()

    if shell:
        command_to_run = command
    elif command:
        command_to_run = _get_command_to_run(command, project_definition, args)
    else:
        command_to_run = _get_command_to_run("default", project_definition,
                                             args)

    # Check for features we don't support right now
    project_environment = project_definition["environment"]
    need_docker = ("dockerfile" in project_environment
                   or "dockerimage" in project_environment)
    if need_docker:
        raise click.ClickException(
            "Docker support in session is currently not implemented. "
            "Please file an feature request at"
            "https://github.com/ray-project/ray/issues")

    cluster_yaml = project_definition["cluster"]
    working_directory = project_definition["name"]

    logger.info("[1/4] Creating cluster")
    create_or_update_cluster(
        config_file=cluster_yaml,
        override_min_workers=None,
        override_max_workers=None,
        no_restart=False,
        restart_only=False,
        yes=True,
        override_cluster_name=None,
    )

    logger.info("[2/4] Syncing the project")
    project_root = ray.projects.find_root(os.getcwd())
    # This is so that rsync syncs directly to the target directory, instead of
    # nesting inside the target directory.
    if not project_root.endswith("/"):
        project_root += "/"
    rsync(
        cluster_yaml,
        source=project_root,
        target="~/{}/".format(working_directory),
        override_cluster_name=None,
        down=False,
    )

    logger.info("[3/4] Setting up environment")
    _setup_environment(
        cluster_yaml, project_definition["environment"], cwd=working_directory)

    logger.info("[4/4] Running command")
    logger.debug("Running {}".format(command))
    session_exec_cluster(cluster_yaml, command_to_run, cwd=working_directory)
Exemplo n.º 9
0
 def create_cluster(self):
     """Create a cluster that will run the session."""
     create_or_update_cluster(
         config_file=self.project_definition.cluster_yaml(),
         override_min_workers=None,
         override_max_workers=None,
         no_restart=False,
         restart_only=False,
         yes=True,
         override_cluster_name=None,
     )
Exemplo n.º 10
0
def start(command, args, shell):
    project_definition = load_project_or_throw()

    if shell:
        command_to_run = command
    else:
        try:
            command_to_run = project_definition.get_command_to_run(
                command=command, args=args)
        except ValueError as e:
            raise click.ClickException(e)

    # Check for features we don't support right now
    project_environment = project_definition.config["environment"]
    need_docker = ("dockerfile" in project_environment
                   or "dockerimage" in project_environment)
    if need_docker:
        raise click.ClickException(
            "Docker support in session is currently not implemented. "
            "Please file an feature request at"
            "https://github.com/ray-project/ray/issues")

    logger.info("[1/4] Creating cluster")
    create_or_update_cluster(
        config_file=project_definition.cluster_yaml(),
        override_min_workers=None,
        override_max_workers=None,
        no_restart=False,
        restart_only=False,
        yes=True,
        override_cluster_name=None,
    )

    logger.info("[2/4] Syncing the project")
    rsync(
        project_definition.cluster_yaml(),
        source=project_definition.root,
        target=project_definition.working_directory(),
        override_cluster_name=None,
        down=False,
    )

    logger.info("[3/4] Setting up environment")
    _setup_environment(project_definition.cluster_yaml(),
                       project_environment,
                       cwd=project_definition.working_directory())

    logger.info("[4/4] Running command")
    logger.debug("Running {}".format(command))
    session_exec_cluster(project_definition.cluster_yaml(),
                         command_to_run,
                         cwd=project_definition.working_directory())
Exemplo n.º 11
0
def submit(cluster_config_file, docker, screen, tmux, stop, start,
           cluster_name, port_forward, script, args, script_args):
    """Uploads and runs a script on the specified cluster.

    The script is automatically synced to the following location:

        os.path.join("~", os.path.basename(script))

    Example:
        >>> ray submit [CLUSTER.YAML] experiment.py -- --smoke-test
    """
    assert not (screen and tmux), "Can specify only one of `screen` or `tmux`."
    assert not (script_args and args), "Use -- --arg1 --arg2 for script args."

    if args:
        logger.warning(
            "ray submit [yaml] [script.py] --args=... is deprecated and "
            "will be removed in a future version of Ray. Use "
            "`ray submit [yaml] script.py -- --arg1 --arg2` instead.")

    if start:
        create_or_update_cluster(cluster_config_file, None, None, False, False,
                                 True, cluster_name)

    target = os.path.join("~", os.path.basename(script))
    rsync(cluster_config_file, script, target, cluster_name, down=False)

    command_parts = ["python", target]
    if script_args:
        command_parts += list(script_args)
    elif args is not None:
        command_parts += [args]

    port_forward = [(port, port) for port in list(port_forward)]
    cmd = " ".join(command_parts)
    exec_cluster(
        cluster_config_file,
        cmd,
        docker,
        screen,
        tmux,
        stop,
        start=False,
        override_cluster_name=cluster_name,
        port_forward=port_forward)
Exemplo n.º 12
0
def create_or_update(cluster_config_file, min_workers, max_workers, no_restart,
                     restart_only, yes, cluster_name):
    """Create or update a Ray cluster."""
    if restart_only or no_restart:
        assert restart_only != no_restart, "Cannot set both 'restart_only' " \
            "and 'no_restart' at the same time!"
    if urllib.parse.urlparse(cluster_config_file).scheme in ("http", "https"):
        try:
            response = urllib.request.urlopen(cluster_config_file, timeout=5)
            content = response.read()
            file_name = cluster_config_file.split("/")[-1]
            with open(file_name, "wb") as f:
                f.write(content)
            cluster_config_file = file_name
        except urllib.error.HTTPError as e:
            logger.info("Error downloading file: ", e)
    create_or_update_cluster(cluster_config_file, min_workers, max_workers,
                             no_restart, restart_only, yes, cluster_name)
Exemplo n.º 13
0
def submit(cluster_config_file, screen, tmux, stop, start, cluster_name,
           port_forward, script, script_args):
    """Uploads and runs a script on the specified cluster.

    The script is automatically synced to the following location:

        os.path.join("~", os.path.basename(script))
    """
    assert not (screen and tmux), "Can specify only one of `screen` or `tmux`."

    if start:
        create_or_update_cluster(cluster_config_file, None, None, False, False,
                                 True, cluster_name)

    target = os.path.join("~", os.path.basename(script))
    rsync(cluster_config_file, script, target, cluster_name, down=False)

    cmd = " ".join(["python", target] + list(script_args))
    exec_cluster(cluster_config_file, cmd, screen, tmux, stop, False,
                 cluster_name, port_forward)
Exemplo n.º 14
0
def create_or_update(cluster_config_file, min_workers, max_workers, no_restart,
                     yes):
    create_or_update_cluster(cluster_config_file, min_workers, max_workers,
                             no_restart, yes)
Exemplo n.º 15
0
def create_or_update(cluster_config_file, min_workers, max_workers, sync_only):
    create_or_update_cluster(cluster_config_file, min_workers, max_workers,
                             sync_only)
Exemplo n.º 16
0
def create_or_update(
        cluster_config_file, min_workers, max_workers, no_restart, yes):
    create_or_update_cluster(
        cluster_config_file, min_workers, max_workers, no_restart, yes)