def submit(cluster_config_file, docker, screen, tmux, stop, start, cluster_name, port_forward, script, args): """Uploads and runs a script on the specified cluster. The script is automatically synced to the following location: os.path.join("~", os.path.basename(script)) Example: >>> ray submit [CLUSTER.YAML] experiment.py --args="--smoke-test" """ assert not (screen and tmux), "Can specify only one of `screen` or `tmux`." if start: create_or_update_cluster(cluster_config_file, None, None, False, False, True, cluster_name) target = os.path.join("~", os.path.basename(script)) rsync(cluster_config_file, script, target, cluster_name, down=False) command_parts = ["python", target] if args is not None: command_parts += [args] cmd = " ".join(command_parts) exec_cluster(cluster_config_file, cmd, docker, screen, tmux, stop, False, cluster_name, port_forward)
def submit(cluster_config_file, screen, tmux, stop, start, cluster_name, port_forward, script, script_args): """Uploads and runs a script on the specified cluster. The script is automatically synced to the following location: os.path.join("~", os.path.basename(script)) """ assert not (screen and tmux), "Can specify only one of `screen` or `tmux`." if start: create_or_update_cluster(cluster_config_file, None, None, False, False, True, cluster_name) target = os.path.join("~", os.path.basename(script)) rsync(cluster_config_file, script, target, cluster_name, down=False) cmd = " ".join(["python", target] + list(script_args)) exec_cluster(cluster_config_file, cmd, screen, tmux, stop, False, cluster_name, port_forward) if tmux or screen: attach_command_parts = ["ray attach", cluster_config_file] if cluster_name is not None: attach_command_parts.append( "--cluster-name={}".format(cluster_name)) if tmux: attach_command_parts.append("--tmux") elif screen: attach_command_parts.append("--screen") attach_command = " ".join(attach_command_parts) attach_info = "Use `{}` to check on command status.".format( attach_command) logger.info(attach_info)
def __do_spawn(self): try: create_or_update_cluster( self.config_file, override_min_workers=None, override_max_workers=None, no_restart=False, restart_only=False, yes=True, override_cluster_name=None, no_config_cache=False, log_old_style=False, log_color="auto", verbose=1, ) # need to re-load the config, as create_or_update_cluster() modifies it with open(self.config_file) as inp: self.config = yaml.safe_load(inp.read()) self.ready = True except BaseException as ex: self.spawner.exc = CannotSpawnCluster( "Cannot spawn cluster", cause=ex, traceback=traceback.format_exc()) if not self.spawner.silent: sys.stderr.write( f"Cannot spawn cluster:\n{traceback.format_exc()}\n")
def create_or_update(cluster_config_file, min_workers, max_workers, no_restart, restart_only, yes, cluster_name): if restart_only or no_restart: assert restart_only != no_restart, "Cannot set both 'restart_only' " \ "and 'no_restart' at the same time!" create_or_update_cluster(cluster_config_file, min_workers, max_workers, no_restart, restart_only, yes, cluster_name)
def submit(cluster_config_file, screen, tmux, stop, start, cluster_name, port_forward, script, script_args): """Uploads and runs a script on the specified cluster. The script is automatically synced to the following location: os.path.join("~", os.path.basename(script)) """ assert not (screen and tmux), "Can specify only one of `screen` or `tmux`." if start: create_or_update_cluster(cluster_config_file, None, None, False, False, True, cluster_name) target = os.path.join("~", os.path.basename(script)) rsync(cluster_config_file, script, target, cluster_name, down=False) cmd = " ".join(["python", target] + list(script_args)) exec_cluster(cluster_config_file, cmd, screen, tmux, stop, False, cluster_name, port_forward) if tmux or screen: attach_command_parts = ["ray attach", cluster_config_file] if cluster_name is not None: attach_command_parts.append( "--cluster-name={}".format(cluster_name)) if tmux: attach_command_parts.append("--tmux") elif screen: attach_command_parts.append("--screen") attach_command = " ".join(attach_command_parts) attach_info = "Use `{}` to check on command status.".format( attach_command) logger.info(attach_info)
def create_or_update(cluster_config_file, min_workers, max_workers, no_restart, restart_only, yes, cluster_name): """Create or update a Ray cluster.""" if restart_only or no_restart: assert restart_only != no_restart, "Cannot set both 'restart_only' " \ "and 'no_restart' at the same time!" create_or_update_cluster(cluster_config_file, min_workers, max_workers, no_restart, restart_only, yes, cluster_name)
def start(command, args): project_definition = load_project_or_throw() if command: command_to_run = _get_command_to_run(command, project_definition, args) else: command_to_run = _get_command_to_run("default", project_definition, args) # Check for features we don't support right now project_environment = project_definition["environment"] need_docker = ("dockerfile" in project_environment or "dockerimage" in project_environment) if need_docker: raise click.ClickException( "Docker support in session is currently not implemented. " "Please file an feature request at" "https://github.com/ray-project/ray/issues") cluster_yaml = project_definition["cluster"] working_directory = project_definition["name"] logger.info("[1/4] Creating cluster") create_or_update_cluster( config_file=cluster_yaml, override_min_workers=None, override_max_workers=None, no_restart=False, restart_only=False, yes=True, override_cluster_name=None, ) logger.info("[2/4] Syncing the repo") if "repo" in project_definition: # HACK: Skip git clone if exists so the this command can be idempotent # More advanced repo update behavior can be found at # https://github.com/jupyterhub/nbgitpuller/blob/master/nbgitpuller/pull.py session_exec_cluster( cluster_yaml, "git clone {repo} {directory} || true".format( repo=project_definition["repo"], directory=project_definition["name"]), ) else: session_exec_cluster( cluster_yaml, "mkdir {directory} || true".format( directory=project_definition["name"])) logger.info("[3/4] Setting up environment") _setup_environment(cluster_yaml, project_definition["environment"], cwd=working_directory) logger.info("[4/4] Running command") logger.debug("Running {}".format(command)) session_exec_cluster(cluster_yaml, command_to_run, cwd=working_directory)
def start(command, args, shell): project_definition = load_project_or_throw() if shell: command_to_run = command elif command: command_to_run = _get_command_to_run(command, project_definition, args) else: command_to_run = _get_command_to_run("default", project_definition, args) # Check for features we don't support right now project_environment = project_definition["environment"] need_docker = ("dockerfile" in project_environment or "dockerimage" in project_environment) if need_docker: raise click.ClickException( "Docker support in session is currently not implemented. " "Please file an feature request at" "https://github.com/ray-project/ray/issues") cluster_yaml = project_definition["cluster"] working_directory = project_definition["name"] logger.info("[1/4] Creating cluster") create_or_update_cluster( config_file=cluster_yaml, override_min_workers=None, override_max_workers=None, no_restart=False, restart_only=False, yes=True, override_cluster_name=None, ) logger.info("[2/4] Syncing the project") project_root = ray.projects.find_root(os.getcwd()) # This is so that rsync syncs directly to the target directory, instead of # nesting inside the target directory. if not project_root.endswith("/"): project_root += "/" rsync( cluster_yaml, source=project_root, target="~/{}/".format(working_directory), override_cluster_name=None, down=False, ) logger.info("[3/4] Setting up environment") _setup_environment( cluster_yaml, project_definition["environment"], cwd=working_directory) logger.info("[4/4] Running command") logger.debug("Running {}".format(command)) session_exec_cluster(cluster_yaml, command_to_run, cwd=working_directory)
def create_cluster(self): """Create a cluster that will run the session.""" create_or_update_cluster( config_file=self.project_definition.cluster_yaml(), override_min_workers=None, override_max_workers=None, no_restart=False, restart_only=False, yes=True, override_cluster_name=None, )
def start(command, args, shell): project_definition = load_project_or_throw() if shell: command_to_run = command else: try: command_to_run = project_definition.get_command_to_run( command=command, args=args) except ValueError as e: raise click.ClickException(e) # Check for features we don't support right now project_environment = project_definition.config["environment"] need_docker = ("dockerfile" in project_environment or "dockerimage" in project_environment) if need_docker: raise click.ClickException( "Docker support in session is currently not implemented. " "Please file an feature request at" "https://github.com/ray-project/ray/issues") logger.info("[1/4] Creating cluster") create_or_update_cluster( config_file=project_definition.cluster_yaml(), override_min_workers=None, override_max_workers=None, no_restart=False, restart_only=False, yes=True, override_cluster_name=None, ) logger.info("[2/4] Syncing the project") rsync( project_definition.cluster_yaml(), source=project_definition.root, target=project_definition.working_directory(), override_cluster_name=None, down=False, ) logger.info("[3/4] Setting up environment") _setup_environment(project_definition.cluster_yaml(), project_environment, cwd=project_definition.working_directory()) logger.info("[4/4] Running command") logger.debug("Running {}".format(command)) session_exec_cluster(project_definition.cluster_yaml(), command_to_run, cwd=project_definition.working_directory())
def submit(cluster_config_file, docker, screen, tmux, stop, start, cluster_name, port_forward, script, args, script_args): """Uploads and runs a script on the specified cluster. The script is automatically synced to the following location: os.path.join("~", os.path.basename(script)) Example: >>> ray submit [CLUSTER.YAML] experiment.py -- --smoke-test """ assert not (screen and tmux), "Can specify only one of `screen` or `tmux`." assert not (script_args and args), "Use -- --arg1 --arg2 for script args." if args: logger.warning( "ray submit [yaml] [script.py] --args=... is deprecated and " "will be removed in a future version of Ray. Use " "`ray submit [yaml] script.py -- --arg1 --arg2` instead.") if start: create_or_update_cluster(cluster_config_file, None, None, False, False, True, cluster_name) target = os.path.join("~", os.path.basename(script)) rsync(cluster_config_file, script, target, cluster_name, down=False) command_parts = ["python", target] if script_args: command_parts += list(script_args) elif args is not None: command_parts += [args] port_forward = [(port, port) for port in list(port_forward)] cmd = " ".join(command_parts) exec_cluster( cluster_config_file, cmd, docker, screen, tmux, stop, start=False, override_cluster_name=cluster_name, port_forward=port_forward)
def create_or_update(cluster_config_file, min_workers, max_workers, no_restart, restart_only, yes, cluster_name): """Create or update a Ray cluster.""" if restart_only or no_restart: assert restart_only != no_restart, "Cannot set both 'restart_only' " \ "and 'no_restart' at the same time!" if urllib.parse.urlparse(cluster_config_file).scheme in ("http", "https"): try: response = urllib.request.urlopen(cluster_config_file, timeout=5) content = response.read() file_name = cluster_config_file.split("/")[-1] with open(file_name, "wb") as f: f.write(content) cluster_config_file = file_name except urllib.error.HTTPError as e: logger.info("Error downloading file: ", e) create_or_update_cluster(cluster_config_file, min_workers, max_workers, no_restart, restart_only, yes, cluster_name)
def submit(cluster_config_file, screen, tmux, stop, start, cluster_name, port_forward, script, script_args): """Uploads and runs a script on the specified cluster. The script is automatically synced to the following location: os.path.join("~", os.path.basename(script)) """ assert not (screen and tmux), "Can specify only one of `screen` or `tmux`." if start: create_or_update_cluster(cluster_config_file, None, None, False, False, True, cluster_name) target = os.path.join("~", os.path.basename(script)) rsync(cluster_config_file, script, target, cluster_name, down=False) cmd = " ".join(["python", target] + list(script_args)) exec_cluster(cluster_config_file, cmd, screen, tmux, stop, False, cluster_name, port_forward)
def create_or_update(cluster_config_file, min_workers, max_workers, no_restart, yes): create_or_update_cluster(cluster_config_file, min_workers, max_workers, no_restart, yes)
def create_or_update(cluster_config_file, min_workers, max_workers, sync_only): create_or_update_cluster(cluster_config_file, min_workers, max_workers, sync_only)
def create_or_update( cluster_config_file, min_workers, max_workers, no_restart, yes): create_or_update_cluster( cluster_config_file, min_workers, max_workers, no_restart, yes)