def sync_to_worker_if_possible(self): """Syncs the local logdir on driver to worker if possible. Requires ray cluster to be started with the autoscaler. Also requires rsync to be installed. """ if self.worker_ip == self.local_ip: return ssh_key = get_ssh_key() ssh_user = get_ssh_user() global _log_sync_warned if ssh_key is None or ssh_user is None: if not _log_sync_warned: logger.error("Log sync requires cluster to be setup with " "`ray up`.") _log_sync_warned = True return if not distutils.spawn.find_executable("rsync"): logger.error("Log sync requires rsync to be installed.") return source = '{}/'.format(self.local_dir) target = '{}@{}:{}/'.format(ssh_user, self.worker_ip, self.local_dir) final_cmd = (("""rsync -savz -e "ssh -i {} -o ConnectTimeout=120s """ """-o StrictHostKeyChecking=no" {} {}""").format( quote(ssh_key), quote(source), quote(target))) logger.info("Syncing results to %s", str(self.worker_ip)) sync_process = subprocess.Popen( final_cmd, shell=True, stdout=self.logfile) sync_process.wait()
def sync_to_worker_if_possible(self): """Syncs the local logdir on driver to worker if possible. Requires ray cluster to be started with the autoscaler. Also requires rsync to be installed. """ if self.worker_ip == self.local_ip: return ssh_key = get_ssh_key() ssh_user = get_ssh_user() global _log_sync_warned if ssh_key is None or ssh_user is None: if not _log_sync_warned: logger.error("Log sync requires cluster to be setup with " "`ray up`.") _log_sync_warned = True return if not distutils.spawn.find_executable("rsync"): logger.error("Log sync requires rsync to be installed.") return source = "{}/".format(self.local_dir) target = "{}@{}:{}/".format(ssh_user, self.worker_ip, self.local_dir) final_cmd = (("""rsync -savz -e "ssh -i {} -o ConnectTimeout=120s """ """-o StrictHostKeyChecking=no" {} {}""").format( quote(ssh_key), quote(source), quote(target))) logger.info("Syncing results to %s", str(self.worker_ip)) sync_process = subprocess.Popen(final_cmd, shell=True, stdout=self.logfile) sync_process.wait()
def sync_now(self, force=False): self.last_sync_time = time.time() if not self.worker_ip: logger.debug("Worker ip unknown, skipping log sync for {}".format( self.local_dir)) return if self.worker_ip == self.local_ip: worker_to_local_sync_cmd = None # don't need to rsync else: ssh_key = get_ssh_key() ssh_user = get_ssh_user() if ssh_key is None or ssh_user is None: logger.error("Log sync requires cluster to be setup with " "`ray create_or_update`.") return if not distutils.spawn.find_executable("rsync"): logger.error("Log sync requires rsync to be installed.") return source = '{}@{}:{}/'.format(ssh_user, self.worker_ip, self.local_dir) target = '{}/'.format(self.local_dir) worker_to_local_sync_cmd = (( """rsync -savz -e "ssh -i {} -o ConnectTimeout=120s """ """-o StrictHostKeyChecking=no" {} {}""").format( quote(ssh_key), quote(source), quote(target))) if self.remote_dir: if self.sync_func: local_to_remote_sync_cmd = None try: self.sync_func(self.local_dir, self.remote_dir) except Exception: logger.exception("Sync function failed.") else: local_to_remote_sync_cmd = self.get_remote_sync_cmd() else: local_to_remote_sync_cmd = None if self.sync_process: self.sync_process.poll() if self.sync_process.returncode is None: if force: self.sync_process.kill() else: logger.warning("Last sync is still in progress, skipping.") return if worker_to_local_sync_cmd or local_to_remote_sync_cmd: final_cmd = "" if worker_to_local_sync_cmd: final_cmd += worker_to_local_sync_cmd if local_to_remote_sync_cmd: if final_cmd: final_cmd += " && " final_cmd += local_to_remote_sync_cmd logger.debug("Running log sync: {}".format(final_cmd)) self.sync_process = subprocess.Popen(final_cmd, shell=True)
def sync_now(self, force=False): self.last_sync_time = time.time() if not self.worker_ip: logger.debug("Worker ip unknown, skipping log sync for {}".format( self.local_dir)) return if self.worker_ip == self.local_ip: worker_to_local_sync_cmd = None # don't need to rsync else: ssh_key = get_ssh_key() ssh_user = get_ssh_user() if ssh_key is None or ssh_user is None: logger.error("Log sync requires cluster to be setup with " "`ray create_or_update`.") return if not distutils.spawn.find_executable("rsync"): logger.error("Log sync requires rsync to be installed.") return source = '{}@{}:{}/'.format(ssh_user, self.worker_ip, self.local_dir) target = '{}/'.format(self.local_dir) worker_to_local_sync_cmd = (( """rsync -savz -e "ssh -i {} -o ConnectTimeout=120s """ """-o StrictHostKeyChecking=no" {} {}""").format( quote(ssh_key), quote(source), quote(target))) if self.remote_dir: if self.sync_func: local_to_remote_sync_cmd = None try: self.sync_func(self.local_dir, self.remote_dir) except Exception: logger.exception("Sync function failed.") else: local_to_remote_sync_cmd = self.get_remote_sync_cmd() else: local_to_remote_sync_cmd = None if self.sync_process: self.sync_process.poll() if self.sync_process.returncode is None: if force: self.sync_process.kill() else: logger.warning("Last sync is still in progress, skipping.") return if worker_to_local_sync_cmd or local_to_remote_sync_cmd: final_cmd = "" if worker_to_local_sync_cmd: final_cmd += worker_to_local_sync_cmd if local_to_remote_sync_cmd: if final_cmd: final_cmd += " && " final_cmd += local_to_remote_sync_cmd logger.debug("Running log sync: {}".format(final_cmd)) self.sync_process = subprocess.Popen(final_cmd, shell=True)
def sync_now(self, force=False): self.last_sync_time = time.time() if not self.worker_ip: print("Worker ip unknown, skipping log sync for {}".format( self.local_dir)) return if self.worker_ip == self.local_ip: worker_to_local_sync_cmd = None # don't need to rsync else: ssh_key = get_ssh_key() ssh_user = get_ssh_user() if ssh_key is None or ssh_user is None: print("Error: log sync requires cluster to be setup with " "`ray create_or_update`.") return if not distutils.spawn.find_executable("rsync"): print("Error: log sync requires rsync to be installed.") return worker_to_local_sync_cmd = (( """rsync -avz -e "ssh -i {} -o ConnectTimeout=120s """ """-o StrictHostKeyChecking=no" '{}@{}:{}/' '{}/'""").format( quote(ssh_key), ssh_user, self.worker_ip, quote(self.local_dir), quote(self.local_dir))) if self.remote_dir: if self.remote_dir.startswith(S3_PREFIX): local_to_remote_sync_cmd = ("aws s3 sync {} {}".format( quote(self.local_dir), quote(self.remote_dir))) elif self.remote_dir.startswith(GCS_PREFIX): local_to_remote_sync_cmd = ("gsutil rsync -r {} {}".format( quote(self.local_dir), quote(self.remote_dir))) else: local_to_remote_sync_cmd = None if self.sync_process: self.sync_process.poll() if self.sync_process.returncode is None: if force: self.sync_process.kill() else: print("Warning: last sync is still in progress, skipping") return if worker_to_local_sync_cmd or local_to_remote_sync_cmd: final_cmd = "" if worker_to_local_sync_cmd: final_cmd += worker_to_local_sync_cmd if local_to_remote_sync_cmd: if final_cmd: final_cmd += " && " final_cmd += local_to_remote_sync_cmd print("Running log sync: {}".format(final_cmd)) self.sync_process = subprocess.Popen(final_cmd, shell=True)
def _remote_path(self): ssh_user = get_ssh_user() global _log_sync_warned if not self.has_remote_target(): return None if ssh_user is None: if not _log_sync_warned: logger.error("Syncer requires cluster to be setup with `ray up`.") _log_sync_warned = True return None return "{}@{}:{}/".format(ssh_user, self.worker_ip, self._remote_dir)
def _remote_path(self): ssh_user = get_ssh_user() global _log_sync_warned if not self._check_valid_worker_ip(): return if ssh_user is None: if not _log_sync_warned: logger.error("Log sync requires cluster to be setup with " "`ray up`.") _log_sync_warned = True return return "{}@{}:{}/".format(ssh_user, self.worker_ip, self._remote_dir)
def _remote_path(self) -> Optional[Union[str, Tuple[str, str]]]: ssh_user = get_ssh_user() global _log_sync_warned if not self.has_remote_target(): return None if ssh_user is None: if not _log_sync_warned: logger.error("Syncer requires cluster to be setup with `ray up`.") _log_sync_warned = True return None if self._pass_ip_path_tuples: return self.worker_ip, self._remote_dir return "{}@{}:{}/".format(ssh_user, self.worker_ip, self._remote_dir)