def _subshell_eval_callback(self, proc_ctx, cmd_str): """Callback when subshell eval command exits""" self.ready = True if proc_ctx.ret_code == 0 and proc_ctx.out: self.remote_command_map[cmd_str] = proc_ctx.out.splitlines()[0] else: # Bad status LOG.error(proc_ctx) self.remote_command_map[cmd_str] = TaskRemoteMgmtError( TaskRemoteMgmtError.MSG_SELECT, (cmd_str, None), cmd_str, proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)
def _remote_host_select_callback(self, proc_ctx, cmd_str): """Callback when host select command exits""" self.ready = True if proc_ctx.ret_code == 0 and proc_ctx.out: # Good status LOG.debug(proc_ctx) self.remote_host_str_map[cmd_str] = proc_ctx.out.splitlines()[0] else: # Bad status LOG.error(proc_ctx) self.remote_host_str_map[cmd_str] = TaskRemoteMgmtError( TaskRemoteMgmtError.MSG_SELECT, (cmd_str, None), cmd_str, proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)
def _remote_init_callback(self, proc_ctx, platform, tmphandle, curve_auth, client_pub_key_dir): """Callback when "cylc remote-init" exits. Write public key for install target into client public key directory. Set remote_init__map status to REMOTE_INIT_DONE on success which in turn will trigger file installation to start. Set remote_init_map status to REMOTE_INIT_FAILED on error. """ try: tmphandle.close() except OSError: # E.g. ignore bad unlink, etc pass install_target = platform['install target'] if proc_ctx.ret_code == 0: if "KEYSTART" in proc_ctx.out: regex_result = re.search('KEYSTART((.|\n|\r)*)KEYEND', proc_ctx.out) key = regex_result.group(1) workflow_srv_dir = get_workflow_srv_dir(self.workflow) public_key = KeyInfo(KeyType.PUBLIC, KeyOwner.CLIENT, workflow_srv_dir=workflow_srv_dir, install_target=install_target) old_umask = os.umask(0o177) with open(public_key.full_key_path, 'w', encoding='utf8') as text_file: text_file.write(key) os.umask(old_umask) # configure_curve must be called every time certificates are # added or removed, in order to update the Authenticator's # state. curve_auth.configure_curve(domain='*', location=(client_pub_key_dir)) self.remote_init_map[install_target] = REMOTE_INIT_DONE self.ready = True return # Bad status LOG.error( TaskRemoteMgmtError(TaskRemoteMgmtError.MSG_INIT, install_target, ' '.join(quote(item) for item in proc_ctx.cmd), proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)) self.remote_init_map[platform['install target']] = REMOTE_INIT_FAILED self.ready = True
def file_install(self, platform): """Install required files on the remote install target. Included by default in the file installation: Files: .service/server.key (required for ZMQ authentication) Directories: app/ bin/ etc/ lib/ """ install_target = platform['install target'] self.remote_init_map[install_target] = REMOTE_FILE_INSTALL_IN_PROGRESS src_path = get_workflow_run_dir(self.workflow) dst_path = get_remote_workflow_run_dir(self.workflow) install_target = platform['install target'] try: cmd, host = construct_rsync_over_ssh_cmd(src_path, dst_path, platform, self.rsync_includes, bad_hosts=self.bad_hosts) ctx = SubProcContext('file-install', cmd, host) except NoHostsError: LOG.error( TaskRemoteMgmtError(TaskRemoteMgmtError.MSG_INIT, install_target, '', '', '', '')) self.remote_init_map[ platform['install target']] = REMOTE_FILE_INSTALL_FAILED self.bad_hosts -= set(platform['hosts']) self.ready = True else: LOG.debug(f"Begin file installation on {install_target}") self.proc_pool.put_command( ctx, bad_hosts=self.bad_hosts, callback=self._file_install_callback, callback_args=[install_target, platform], callback_255=self._file_install_callback_255, )
def _remote_init_callback(self, proc_ctx, host, owner, tmphandle): """Callback when "cylc remote-init" exits""" self.ready = True try: tmphandle.close() except OSError: # E.g. ignore bad unlink, etc pass if proc_ctx.ret_code == 0: for status in (REMOTE_INIT_DONE, REMOTE_INIT_NOT_REQUIRED): if status in proc_ctx.out: # Good status LOG.debug(proc_ctx) self.remote_init_map[(host, owner)] = status return # Bad status LOG.error( TaskRemoteMgmtError(TaskRemoteMgmtError.MSG_INIT, (host, owner), ' '.join(quote(item) for item in proc_ctx.cmd), proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)) LOG.error(proc_ctx) self.remote_init_map[(host, owner)] = REMOTE_INIT_FAILED
def remote_tidy(self): """Remove suite contact files from initialised remotes. Call "cylc remote-tidy". This method is called on suite shutdown, so we want nothing to hang. Timeout any incomplete commands after 10 seconds. Also remove UUID file on suite host ".service/uuid". """ # Remove UUID file uuid_fname = os.path.join(get_suite_srv_dir(self.suite), FILE_BASE_UUID) try: os.unlink(uuid_fname) except OSError: pass # Issue all SSH commands in parallel procs = {} for (host, owner), init_with_contact in self.remote_init_map.items(): if init_with_contact != REMOTE_INIT_DONE: continue cmd = ['timeout', '10', 'cylc', 'remote-tidy'] if is_remote_host(host): cmd.append('--host=%s' % host) if is_remote_user(owner): cmd.append('--user=%s' % owner) if cylc.flow.flags.debug: cmd.append('--debug') cmd.append(get_remote_suite_run_dir(host, owner, self.suite)) procs[(host, owner)] = (cmd, Popen(cmd, stdout=PIPE, stderr=PIPE, stdin=DEVNULL)) # Wait for commands to complete for a max of 10 seconds timeout = time() + 10.0 while procs and time() < timeout: for (host, owner), (cmd, proc) in procs.copy().items(): if proc.poll() is None: continue del procs[(host, owner)] out, err = (f.decode() for f in proc.communicate()) if proc.wait(): LOG.warning( TaskRemoteMgmtError( TaskRemoteMgmtError.MSG_TIDY, (host, owner), ' '.join(quote(item) for item in cmd), proc.returncode, out, err)) # Terminate any remaining commands for (host, owner), (cmd, proc) in procs.items(): try: proc.terminate() except OSError: pass out, err = proc.communicate() if proc.wait(): LOG.warning( TaskRemoteMgmtError(TaskRemoteMgmtError.MSG_TIDY, (host, owner), ' '.join(quote(item) for item in cmd), proc.returncode, out, err))
def _remote_init_callback( self, proc_ctx, platform, tmphandle, curve_auth, client_pub_key_dir): """Callback when "cylc remote-init" exits""" self.ready = True try: tmphandle.close() except OSError: # E.g. ignore bad unlink, etc pass self.install_target = platform['install target'] if proc_ctx.ret_code == 0: if REMOTE_INIT_DONE in proc_ctx.out: src_path = get_suite_run_dir(self.suite) dst_path = get_remote_suite_run_dir(platform, self.suite) try: process = procopen(construct_rsync_over_ssh_cmd( src_path, dst_path, platform, self.rsync_includes), stdoutpipe=True, stderrpipe=True, universal_newlines=True) out, err = process.communicate(timeout=600) install_target = platform['install target'] if out: RSYNC_LOG.info( 'File installation information for ' f'{install_target}:\n {out}') if err: LOG.error( 'File installation error on ' f'{install_target}:\n {err}') except Exception as ex: LOG.error(f"Problem during rsync: {ex}") self.remote_init_map[self.install_target] = ( REMOTE_INIT_FAILED) return if "KEYSTART" in proc_ctx.out: regex_result = re.search( 'KEYSTART((.|\n|\r)*)KEYEND', proc_ctx.out) key = regex_result.group(1) suite_srv_dir = get_suite_srv_dir(self.suite) public_key = KeyInfo( KeyType.PUBLIC, KeyOwner.CLIENT, suite_srv_dir=suite_srv_dir, install_target=self.install_target ) old_umask = os.umask(0o177) with open( public_key.full_key_path, 'w', encoding='utf8') as text_file: text_file.write(key) os.umask(old_umask) # configure_curve must be called every time certificates are # added or removed, in order to update the Authenticator's # state. curve_auth.configure_curve( domain='*', location=(client_pub_key_dir)) for status in (REMOTE_INIT_DONE, REMOTE_INIT_NOT_REQUIRED): if status in proc_ctx.out: # Good status LOG.debug(proc_ctx) self.remote_init_map[self.install_target] = status return # Bad status LOG.error(TaskRemoteMgmtError( TaskRemoteMgmtError.MSG_INIT, platform['install target'], ' '.join( quote(item) for item in proc_ctx.cmd), proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)) LOG.error(proc_ctx) self.remote_init_map[platform['install target']] = REMOTE_INIT_FAILED
def remote_tidy(self): """Remove workflow contact files and keys from initialised remotes. Call "cylc remote-tidy". This method is called on workflow shutdown, so we want nothing to hang. Timeout any incomplete commands after 10 seconds. """ from cylc.flow.platforms import PlatformLookupError # Issue all SSH commands in parallel def construct_remote_tidy_ssh_cmd(install_target, platform): cmd = ['remote-tidy'] if cylc.flow.flags.verbosity > 1: cmd.append('--debug') cmd.append(install_target) cmd.append(get_remote_workflow_run_dir(self.workflow)) host = get_host_from_platform(platform, bad_hosts=self.bad_hosts) cmd = construct_ssh_cmd(cmd, platform, host, timeout='10s') return cmd, host procs = {} for install_target, message in self.remote_init_map.items(): if message != REMOTE_FILE_INSTALL_DONE: continue if install_target == get_localhost_install_target(): continue platform = get_random_platform_for_install_target(install_target) platform_n = platform['name'] try: cmd, host = construct_remote_tidy_ssh_cmd( install_target, platform) except (NoHostsError, PlatformLookupError): LOG.warning( TaskRemoteMgmtError(TaskRemoteMgmtError.MSG_TIDY, platform_n, 1, '', '', 'remote tidy')) else: LOG.debug("Removing authentication keys and contact file " f"from remote: \"{install_target}\"") procs[platform_n] = (cmd, host, Popen(cmd, stdout=PIPE, stderr=PIPE, stdin=DEVNULL)) # Wait for commands to complete for a max of 10 seconds timeout = time() + 10.0 while procs and time() < timeout: for platform_n, (cmd, host, proc) in procs.copy().items(): if proc.poll() is None: continue del procs[platform_n] out, err = (f.decode() for f in proc.communicate()) # 255 error has to be handled here becuase remote tidy doesn't # use SubProcPool. if proc.returncode == 255: timeout = time() + 10.0 self.bad_hosts.add(host) LOG.warning( f'Tried to tidy remote platform: \'{platform_n}\' ' f'using host \'{host}\' but failed; ' 'trying a different host') try: retry_cmd, host = construct_remote_tidy_ssh_cmd( install_target, platform) except (NoHostsError, PlatformLookupError): LOG.warning( TaskRemoteMgmtError(TaskRemoteMgmtError.MSG_TIDY, platform_n, '', '', '', '')) else: procs[platform_n] = (retry_cmd, host, Popen(retry_cmd, stdout=PIPE, stderr=PIPE, stdin=DEVNULL)) if proc.wait() and proc.returncode != 255: LOG.warning( TaskRemoteMgmtError( TaskRemoteMgmtError.MSG_TIDY, platform_n, ' '.join(quote(item) for item in cmd), proc.returncode, out, err)) # Terminate any remaining commands for platform_n, (cmd, proc) in procs.items(): with suppress(OSError): proc.terminate() out, err = (f.decode() for f in proc.communicate()) if proc.wait(): LOG.warning( TaskRemoteMgmtError(TaskRemoteMgmtError.MSG_TIDY, platform_n, ' '.join(quote(item) for item in cmd), proc.returncode, out, err))
def remote_init(self, platform: Dict[str, Any], curve_auth: 'ThreadAuthenticator', client_pub_key_dir: str) -> None: """Initialise a remote host if necessary. Call "cylc remote-init" to install workflow items to remote: ".service/contact": For TCP task communication "python/": if source exists Args: platform: A dict containing settings relating to platform used in this remote installation. curve_auth: The ZMQ authenticator. client_pub_key_dir: Client public key directory, used by the ZMQ authenticator. """ install_target = platform['install target'] if install_target == get_localhost_install_target(): self.remote_init_map[install_target] = REMOTE_FILE_INSTALL_DONE return # Set status of install target to in progress while waiting for remote # initialisation to finish self.remote_init_map[install_target] = REMOTE_INIT_IN_PROGRESS # Determine what items to install comms_meth: CommsMeth = CommsMeth(platform['communication method']) items = self._remote_init_items(comms_meth) # Create a TAR archive with the service files, # so they can be sent later via SSH's STDIN to the task remote. tmphandle = self.proc_pool.get_temporary_file() tarhandle = tarfile.open(fileobj=tmphandle, mode='w') for path, arcname in items: tarhandle.add(path, arcname=arcname) tarhandle.close() tmphandle.seek(0) # Build the remote-init command to be run over ssh cmd = ['remote-init'] if cylc.flow.flags.verbosity > 1: cmd.append('--debug') cmd.append(str(install_target)) cmd.append(get_remote_workflow_run_dir(self.workflow)) dirs_to_symlink = get_dirs_to_symlink(install_target, self.workflow) for key, value in dirs_to_symlink.items(): if value is not None: cmd.append(f"{key}={quote(value)} ") # Create the ssh command try: host = get_host_from_platform(platform, bad_hosts=self.bad_hosts) except NoHostsError: LOG.error( TaskRemoteMgmtError(TaskRemoteMgmtError.MSG_INIT, install_target, ' '.join(quote(item) for item in cmd), 42, '', '')) self.remote_init_map[ platform['install target']] = REMOTE_INIT_FAILED self.bad_hosts -= set(platform['hosts']) self.ready = True else: cmd = construct_ssh_cmd(cmd, platform, host) self.proc_pool.put_command( SubProcContext('remote-init', cmd, stdin_files=[tmphandle], host=host), bad_hosts=self.bad_hosts, callback=self._remote_init_callback, callback_args=[ platform, tmphandle, curve_auth, client_pub_key_dir ], callback_255=self._remote_init_callback_255, callback_255_args=[platform])