def _setup_job_logs_retrieval(self, itask, event): """Set up remote job logs retrieval. For a task with a job completion event, i.e. succeeded, failed, (execution) retry. """ id_key = ((self.HANDLER_JOB_LOGS_RETRIEVE, event), str(itask.point), itask.tdef.name, itask.submit_num) if itask.task_owner: user_at_host = itask.task_owner + "@" + itask.task_host else: user_at_host = itask.task_host events = (self.EVENT_FAILED, self.EVENT_RETRY, self.EVENT_SUCCEEDED) if (event not in events or user_at_host in [get_user() + '@localhost', 'localhost'] or not self.get_host_conf(itask, "retrieve job logs") or id_key in self.event_timers): return retry_delays = self.get_host_conf(itask, "retrieve job logs retry delays") if not retry_delays: retry_delays = [0] self.event_timers[id_key] = TaskActionTimer( TaskJobLogsRetrieveContext( self.HANDLER_JOB_LOGS_RETRIEVE, # key self.HANDLER_JOB_LOGS_RETRIEVE, # ctx_type user_at_host, self.get_host_conf(itask, "retrieve job logs max size"), ), retry_delays)
def __init__( self, workflow: str, broadcast_mgr: BroadcastMgr, data_store_mgr: DataStoreMgr, proc_pool: SubProcPool, user: Optional[str] = None, workflow_run_dir: Optional[str] = None, workflow_share_dir: Optional[str] = None, ): # Workflow function and clock triggers by label. self.functx_map: Dict[str, SubFuncContext] = {} # When next to call a function, by signature. self.t_next_call: dict = {} # Satisfied triggers and their function results, by signature. self.sat_xtrig: dict = {} # Signatures of active functions (waiting on callback). self.active: list = [] self.workflow_run_dir = workflow_run_dir # For function arg templating. if not user: user = get_user() self.farg_templ: Dict[str, Any] = { TMPL_WORKFLOW_NAME: workflow, TMPL_USER_NAME: user, TMPL_WORKFLOW_RUN_DIR: workflow_run_dir, TMPL_WORKFLOW_SHARE_DIR: workflow_share_dir, TMPL_DEBUG_MODE: cylc.flow.flags.verbosity > 1 } self.proc_pool = proc_pool self.broadcast_mgr = broadcast_mgr self.data_store_mgr = data_store_mgr
def _is_local_auth_ok(reg, owner, host): """Return True if it is OK to use local passphrase file. Use values in ~/cylc-run/REG/.service/contact to make a judgement. """ if is_remote(host, owner): fname = os.path.join(get_suite_srv_dir(reg), SuiteFiles.Service.CONTACT) data = {} try: for line in open(fname): key, value = ([item.strip() for item in line.split("=", 1)]) data[key] = value except (IOError, ValueError): # No contact file return False else: # Contact file exists, check values match if owner is None: owner = get_user() if host is None: host = get_host() host_value = data.get(ContactFileFields.HOST, "") return (reg == data.get(ContactFileFields.NAME) and owner == data.get(ContactFileFields.OWNER) and (host == host_value or host == host_value.split(".", 1)[0] # no domain )) else: return True
def _setup_event_mail(self, itask, event): """Set up task event notification, by email.""" if event in self.NON_UNIQUE_EVENTS: key1 = (self.HANDLER_MAIL, '%s-%d' % (event, itask.non_unique_events.get(event, 1))) else: key1 = (self.HANDLER_MAIL, event) id_key = (key1, str(itask.point), itask.tdef.name, itask.submit_num) if (id_key in self.event_timers or event not in self._get_events_conf( itask, "mail events", [])): return retry_delays = self._get_events_conf(itask, "mail retry delays") if not retry_delays: retry_delays = [0] self.event_timers[id_key] = TaskActionTimer( TaskEventMailContext( self.HANDLER_MAIL, # key self.HANDLER_MAIL, # ctx_type self._get_events_conf( # mail_from itask, "mail from", "notifications@" + get_host(), ), self._get_events_conf(itask, "mail to", get_user()), # mail_to self._get_events_conf(itask, "mail smtp"), # mail_smtp ), retry_delays)
def get_suite_srv_dir(self, reg, suite_owner=None): """Return service directory of a suite.""" if not suite_owner: suite_owner = get_user() run_d = os.getenv("CYLC_SUITE_RUN_DIR") if (not run_d or os.getenv("CYLC_SUITE_NAME") != reg or os.getenv("CYLC_SUITE_OWNER") != suite_owner): run_d = get_suite_run_dir(reg) return os.path.join(run_d, self.DIR_BASE_SRV)
def get_suite_srv_dir(self, reg, suite_owner=None): """Return service directory of a suite.""" if not suite_owner: suite_owner = get_user() run_d = os.getenv("CYLC_SUITE_RUN_DIR") if (not run_d or os.getenv("CYLC_SUITE_NAME") != reg or os.getenv("CYLC_SUITE_OWNER") != suite_owner): run_d = glbl_cfg().get_derived_host_item( reg, 'suite run directory') return os.path.join(run_d, self.DIR_BASE_SRV)
def get_workflow_srv_dir(reg, workflow_owner=None): """Return service directory of a workflow.""" if not workflow_owner: workflow_owner = get_user() run_d = os.getenv("CYLC_WORKFLOW_RUN_DIR") if ( not run_d or os.getenv("CYLC_WORKFLOW_NAME") != reg or os.getenv("CYLC_WORKFLOW_OWNER") != workflow_owner ): run_d = get_workflow_run_dir(reg) return os.path.join(run_d, WorkflowFiles.Service.DIRNAME)
def get_suite_srv_dir(reg, suite_owner=None): """Return service directory of a suite.""" if not suite_owner: suite_owner = get_user() run_d = os.getenv("CYLC_SUITE_RUN_DIR") if ( not run_d or os.getenv("CYLC_SUITE_NAME") != reg or os.getenv("CYLC_SUITE_OWNER") != suite_owner ): run_d = get_workflow_run_dir(reg) return os.path.join(run_d, SuiteFiles.Service.DIRNAME)
def __init__( self, suite: str, user: str = None, *, # following must be keyword args broadcast_mgr: BroadcastMgr = None, proc_pool: SubProcPool = None, suite_run_dir: str = None, suite_share_dir: str = None, suite_work_dir: str = None, suite_source_dir: str = None, ): """Initialize the xtrigger manager. Args: suite (str): suite name user (str): suite owner broadcast_mgr (BroadcastMgr): the Broadcast Manager proc_pool (SubProcPool): pool of Subprocesses suite_run_dir (str): suite run directory suite_share_dir (str): suite share directory suite_source_dir (str): suite source directory """ # Suite function and clock triggers by label. self.functx_map = {} self.clockx_map = {} # When next to call a function, by signature. self.t_next_call = {} # Satisfied triggers and their function results, by signature. self.sat_xtrig = {} # Signatures of satisfied clock triggers. self.sat_xclock = [] # Signatures of active functions (waiting on callback). self.active = [] # All trigger and clock signatures in the current task pool. self.all_xtrig = [] self.all_xclock = [] self.pflag = False # For function arg templating. if not user: user = get_user() self.farg_templ = { TMPL_SUITE_NAME: suite, TMPL_USER_NAME: user, TMPL_SUITE_RUN_DIR: suite_run_dir, TMPL_SUITE_SHARE_DIR: suite_share_dir, TMPL_DEBUG_MODE: cylc.flow.flags.debug } self.proc_pool = proc_pool self.broadcast_mgr = broadcast_mgr self.suite_source_dir = suite_source_dir
def _validate_workflow_ids(*tokens_list, src_path): for ind, tokens in enumerate(tokens_list): if tokens['user'] and (tokens['user'] != get_user()): raise UserInputError( "Operating on other users' workflows is not supported") if not src_path: validate_workflow_name(tokens['workflow']) if ind == 0 and src_path: # source workflow passed in as a path pass else: src_path = Path(get_workflow_run_dir(tokens['workflow'])) if src_path.is_file(): raise UserInputError( f'Workflow ID cannot be a file: {tokens["workflow"]}') detect_both_flow_and_suite(src_path)
def _run_event_mail(self, config, ctx): """Helper for "run_event_handlers", do mail notification.""" if ctx.event in self.get_events_conf(config, 'mail events', []): # SMTP server env = dict(os.environ) mail_smtp = self.get_events_conf(config, 'mail smtp') if mail_smtp: env['smtp'] = mail_smtp subject = '[suite %(event)s] %(suite)s' % { 'suite': ctx.suite, 'event': ctx.event} stdin_str = '' for name, value in [ ('suite event', ctx.event), ('reason', ctx.reason), ('suite', ctx.suite), ('host', ctx.host), ('port', ctx.port), ('owner', ctx.owner)]: if value: stdin_str += '%s: %s\n' % (name, value) mail_footer_tmpl = self.get_events_conf(config, 'mail footer') if mail_footer_tmpl: stdin_str += (mail_footer_tmpl + '\n') % { 'host': ctx.host, 'port': ctx.port, 'owner': ctx.owner, 'suite': ctx.suite} proc_ctx = SubProcContext( (self.SUITE_EVENT_HANDLER, ctx.event), [ 'mail', '-s', subject, '-r', self.get_events_conf( config, 'mail from', 'notifications@' + get_host()), self.get_events_conf(config, 'mail to', get_user()), ], env=env, stdin_str=stdin_str) if self.proc_pool.closed: # Run command in foreground if process pool is closed self.proc_pool.run_command(proc_ctx) self._run_event_handlers_callback(proc_ctx) else: # Run command using process pool otherwise self.proc_pool.put_command( proc_ctx, self._run_event_mail_callback)
def _send_mail(self, event, subject, message, schd, env): proc_ctx = SubProcContext( (self.WORKFLOW_EVENT_HANDLER, event), [ 'mail', '-s', subject, '-r', self.get_events_conf( schd.config, 'from', 'notifications@' + get_host()), self.get_events_conf(schd.config, 'to', get_user()), ], env=env, stdin_str=message) if self.proc_pool.closed: # Run command in foreground if process pool is closed self.proc_pool.run_command(proc_ctx) self._run_event_handlers_callback(proc_ctx) else: # Run command using process pool otherwise self.proc_pool.put_command( proc_ctx, callback=self._run_event_mail_callback)
def __init__( self, workflow: str, broadcast_mgr: BroadcastMgr, data_store_mgr: DataStoreMgr, proc_pool: SubProcPool, user: Optional[str] = None, workflow_run_dir: Optional[str] = None, workflow_share_dir: Optional[str] = None, ): # Workflow function and clock triggers by label. self.functx_map: Dict[str, SubFuncContext] = {} # When next to call a function, by signature. self.t_next_call: dict = {} # Satisfied triggers and their function results, by signature. self.sat_xtrig: dict = {} # Signatures of active functions (waiting on callback). self.active: list = [] self.workflow_run_dir = workflow_run_dir # For function arg templating. if not user: user = get_user() self.farg_templ: Dict[str, Any] = { TemplateVariables.Workflow.value: workflow, TemplateVariables.UserName.value: user, TemplateVariables.RunDir.value: workflow_run_dir, TemplateVariables.ShareDir.value: workflow_share_dir, TemplateVariables.DebugMode.value: cylc.flow.flags.verbosity > 1, # deprecated TemplateVariables.WorkflowName.value: workflow, TemplateVariables.SuiteName.value: workflow, TemplateVariables.SuiteRunDir.value: workflow, TemplateVariables.SuiteShareDir.value: workflow, } self.proc_pool = proc_pool self.broadcast_mgr = broadcast_mgr self.data_store_mgr = data_store_mgr
def __init__( self, suite: str, broadcast_mgr: BroadcastMgr, data_store_mgr: DataStoreMgr, proc_pool: SubProcPool, user: Optional[str] = None, suite_run_dir: Optional[str] = None, suite_share_dir: Optional[str] = None, ): # Suite function and clock triggers by label. self.functx_map: Dict[str, SubFuncContext] = {} # When next to call a function, by signature. self.t_next_call: dict = {} # Satisfied triggers and their function results, by signature. self.sat_xtrig: dict = {} # Signatures of active functions (waiting on callback). self.active: list = [] # All trigger and clock signatures in the current task pool. self.all_xtrig: list = [] self.suite_run_dir = suite_run_dir self.pflag = False # For function arg templating. if not user: user = get_user() self.farg_templ: Dict[str, Any] = { TMPL_SUITE_NAME: suite, TMPL_USER_NAME: user, TMPL_SUITE_RUN_DIR: suite_run_dir, TMPL_SUITE_SHARE_DIR: suite_share_dir, TMPL_DEBUG_MODE: cylc.flow.flags.debug } self.proc_pool = proc_pool self.broadcast_mgr = broadcast_mgr self.data_store_mgr = data_store_mgr
def cache_passphrase(self, reg, owner, host, value): """Cache and dump passphrase for a remote suite in standard location. Save passphrase to ~/.cylc/auth/owner@host/reg if possible. This is normally called on a successful authentication, and will cache the remote passphrase in memory as well. """ if owner is None: owner = get_user() if host is None: host = get_host() path = self._get_cache_dir(reg, owner, host) self.cache[self.FILE_BASE_PASSPHRASE][(reg, owner, host)] = value # Dump to a file only for remote suites loaded via SSH. if self.can_disk_cache_passphrases.get((reg, owner, host)): # Although not desirable, failing to dump the passphrase to a file # is not disastrous. try: self._dump_item(path, self.FILE_BASE_PASSPHRASE, value) except (IOError, OSError): if cylc.flow.flags.debug: import traceback traceback.print_exc()
def __init__( self, suite: str, user: str = None, *, # following must be keyword args broadcast_mgr: BroadcastMgr = None, proc_pool: SubProcPool = None, suite_run_dir: str = None, suite_share_dir: str = None, suite_source_dir: str = None, ): # Suite function and clock triggers by label. self.functx_map = {} # When next to call a function, by signature. self.t_next_call = {} # Satisfied triggers and their function results, by signature. self.sat_xtrig = {} # Signatures of active functions (waiting on callback). self.active = [] # All trigger and clock signatures in the current task pool. self.all_xtrig = [] self.pflag = False # For function arg templating. if not user: user = get_user() self.farg_templ = { TMPL_SUITE_NAME: suite, TMPL_USER_NAME: user, TMPL_SUITE_RUN_DIR: suite_run_dir, TMPL_SUITE_SHARE_DIR: suite_share_dir, TMPL_DEBUG_MODE: cylc.flow.flags.debug } self.proc_pool = proc_pool self.broadcast_mgr = broadcast_mgr self.suite_source_dir = suite_source_dir
def _is_local_auth_ok(self, reg, owner, host): """Return True if it is OK to use local passphrase file. Use values in ~/cylc-run/REG/.service/contact to make a judgement. Cache results in self.can_use_load_auths. """ if (reg, owner, host) not in self.can_use_load_auths: if is_remote(host, owner): fname = os.path.join( self.get_suite_srv_dir(reg), self.FILE_BASE_CONTACT) data = {} try: for line in open(fname): key, value = ( [item.strip() for item in line.split("=", 1)]) data[key] = value except (IOError, ValueError): # No contact file self.can_use_load_auths[(reg, owner, host)] = False else: # Contact file exists, check values match if owner is None: owner = get_user() if host is None: host = get_host() host_value = data.get(self.KEY_HOST, "") self.can_use_load_auths[(reg, owner, host)] = ( reg == data.get(self.KEY_NAME) and owner == data.get(self.KEY_OWNER) and ( host == host_value or host == host_value.split(".", 1)[0] # no domain ) ) else: self.can_use_load_auths[(reg, owner, host)] = True return self.can_use_load_auths[(reg, owner, host)]
def _load_remote_item(self, item, reg, owner, host): """Load content of service item from remote [owner@]host via SSH.""" if not is_remote(host, owner): return if host is None: host = 'localhost' if owner is None: owner = get_user() if item == self.FILE_BASE_CONTACT and not is_remote_host(host): # Attempt to read suite contact file via the local filesystem. path = r'%(run_d)s/%(srv_base)s' % { 'run_d': get_remote_suite_run_dir('localhost', owner, reg), 'srv_base': self.DIR_BASE_SRV, } content = self._load_local_item(item, path) if content is not None: return content # Else drop through and attempt via ssh to the suite account. # Prefix STDOUT to ensure returned content is relevant prefix = r'[CYLC-AUTH] %(suite)s' % {'suite': reg} # Attempt to cat passphrase file under suite service directory script = (r"""echo '%(prefix)s'; """ r'''cat "%(run_d)s/%(srv_base)s/%(item)s"''') % { 'prefix': prefix, 'run_d': get_remote_suite_run_dir(host, owner, reg), 'srv_base': self.DIR_BASE_SRV, 'item': item } import shlex command = shlex.split(glbl_cfg().get_host_item('ssh command', host, owner)) command += ['-n', owner + '@' + host, script] from subprocess import Popen, PIPE, DEVNULL # nosec try: proc = Popen(command, stdin=DEVNULL, stdout=PIPE, stderr=PIPE) # nosec except OSError: if cylc.flow.flags.debug: import traceback traceback.print_exc() return out, err = (f.decode() for f in proc.communicate()) ret_code = proc.wait() # Extract passphrase from STDOUT # It should live in the line with the correct prefix content = "" can_read = False for line in out.splitlines(True): if can_read: content += line elif line.strip() == prefix: can_read = True if not content or ret_code: LOG.debug( '$ %(command)s # code=%(ret_code)s\n%(err)s', { 'command': command, # STDOUT may contain passphrase, so not safe to print # 'out': out, 'err': err, 'ret_code': ret_code, }) return return content
def get_auth_item(self, item, reg, owner=None, host=None, content=False): """Locate/load passphrase, SSL private key, SSL certificate, etc. Return file name, or content of file if content=True is set. Files are searched from these locations in order: 1/ For running task jobs, service directory under: a/ $CYLC_SUITE_RUN_DIR for remote jobs. b/ $CYLC_SUITE_RUN_DIR_ON_SUITE_HOST for local jobs or remote jobs with SSH messaging. 2/ (Passphrases only) From memory cache, for remote suite passphrases. Don't use if content=False. 3/ For suite on local user@host. The suite service directory. 4/ Location under $HOME/.cylc/ for remote suite control from accounts that do not actually need the suite definition directory to be installed: $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/ 5/ For remote suites, try locating the file from the suite service directory on remote owner@host via SSH. If content=False, the value of the located file will be dumped under: $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/ """ if item not in [ self.FILE_BASE_PASSPHRASE, self.FILE_BASE_CONTACT, self.FILE_BASE_CONTACT2]: raise ValueError("%s: item not recognised" % item) if item == self.FILE_BASE_PASSPHRASE: self.can_disk_cache_passphrases[(reg, owner, host)] = False if reg == os.getenv('CYLC_SUITE_NAME'): env_keys = [] if 'CYLC_SUITE_RUN_DIR' in os.environ: # 1(a)/ Task messaging call. env_keys.append('CYLC_SUITE_RUN_DIR') elif self.KEY_SUITE_RUN_DIR_ON_SUITE_HOST in os.environ: # 1(b)/ Task messaging call via ssh messaging. env_keys.append(self.KEY_SUITE_RUN_DIR_ON_SUITE_HOST) for key in env_keys: path = os.path.join(os.environ[key], self.DIR_BASE_SRV) if content: value = self._load_local_item(item, path) else: value = self._locate_item(item, path) if value: return value # 2/ From memory cache if item in self.cache: my_owner = owner my_host = host if my_owner is None: my_owner = get_user() if my_host is None: my_host = get_host() try: return self.cache[item][(reg, my_owner, my_host)] except KeyError: pass # 3/ Local suite service directory if self._is_local_auth_ok(reg, owner, host): path = self.get_suite_srv_dir(reg) if content: value = self._load_local_item(item, path) else: value = self._locate_item(item, path) if value: return value # 4/ Disk cache for remote suites if owner is not None and host is not None: paths = [self._get_cache_dir(reg, owner, host)] short_host = host.split('.', 1)[0] if short_host != host: paths.append(self._get_cache_dir(reg, owner, short_host)) for path in paths: if content: value = self._load_local_item(item, path) else: value = self._locate_item(item, path) if value: return value # 5/ Use SSH to load content from remote owner@host # Note: It is not possible to find ".service/contact2" on the suite # host, because it is installed on task host by "cylc remote-init" on # demand. if item != self.FILE_BASE_CONTACT2: value = self._load_remote_item(item, reg, owner, host) if value: if item == self.FILE_BASE_PASSPHRASE: self.can_disk_cache_passphrases[(reg, owner, host)] = True if not content: path = self._get_cache_dir(reg, owner, host) self._dump_item(path, item, value) value = os.path.join(path, item) return value raise SuiteServiceFileError("Couldn't get %s" % item)
def get_auth_item(self, item, reg, owner=None, host=None, content=False): """Locate/load passphrase, SSL private key, SSL certificate, etc. Return file name, or content of file if content=True is set. Files are searched from these locations in order: 1/ For running task jobs, service directory under: a/ $CYLC_SUITE_RUN_DIR for remote jobs. b/ $CYLC_SUITE_RUN_DIR_ON_SUITE_HOST for local jobs or remote jobs with SSH messaging. 2/ (Passphrases only) From memory cache, for remote suite passphrases. Don't use if content=False. 3/ For suite on local user@host. The suite service directory. 4/ Location under $HOME/.cylc/ for remote suite control from accounts that do not actually need the suite definition directory to be installed: $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/ 5/ For remote suites, try locating the file from the suite service directory on remote owner@host via SSH. If content=False, the value of the located file will be dumped under: $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/ """ if item not in [ self.FILE_BASE_PASSPHRASE, self.FILE_BASE_CONTACT, self.FILE_BASE_CONTACT2 ]: raise ValueError("%s: item not recognised" % item) if item == self.FILE_BASE_PASSPHRASE: self.can_disk_cache_passphrases[(reg, owner, host)] = False if reg == os.getenv('CYLC_SUITE_NAME'): env_keys = [] if 'CYLC_SUITE_RUN_DIR' in os.environ: # 1(a)/ Task messaging call. env_keys.append('CYLC_SUITE_RUN_DIR') elif self.KEY_SUITE_RUN_DIR_ON_SUITE_HOST in os.environ: # 1(b)/ Task messaging call via ssh messaging. env_keys.append(self.KEY_SUITE_RUN_DIR_ON_SUITE_HOST) for key in env_keys: path = os.path.join(os.environ[key], self.DIR_BASE_SRV) if content: value = self._load_local_item(item, path) else: value = self._locate_item(item, path) if value: return value # 2/ From memory cache if item in self.cache: my_owner = owner my_host = host if my_owner is None: my_owner = get_user() if my_host is None: my_host = get_host() try: return self.cache[item][(reg, my_owner, my_host)] except KeyError: pass # 3/ Local suite service directory if self._is_local_auth_ok(reg, owner, host): path = self.get_suite_srv_dir(reg) if content: value = self._load_local_item(item, path) else: value = self._locate_item(item, path) if value: return value # 4/ Disk cache for remote suites if owner is not None and host is not None: paths = [self._get_cache_dir(reg, owner, host)] short_host = host.split('.', 1)[0] if short_host != host: paths.append(self._get_cache_dir(reg, owner, short_host)) for path in paths: if content: value = self._load_local_item(item, path) else: value = self._locate_item(item, path) if value: return value # 5/ Use SSH to load content from remote owner@host # Note: It is not possible to find ".service/contact2" on the suite # host, because it is installed on task host by "cylc remote-init" on # demand. if item != self.FILE_BASE_CONTACT2: value = self._load_remote_item(item, reg, owner, host) if value: if item == self.FILE_BASE_PASSPHRASE: self.can_disk_cache_passphrases[(reg, owner, host)] = True if not content: path = self._get_cache_dir(reg, owner, host) self._dump_item(path, item, value) value = os.path.join(path, item) return value raise SuiteServiceFileError("Couldn't get %s" % item)
def test_get_user(): """get_user.""" assert os.getenv('USER') == get_user()
def _run_event_mail(self, config, ctx): """Helper for "run_event_handlers", do mail notification.""" if ctx.event in self.get_events_conf(config, 'mail events', []): # SMTP server env = dict(os.environ) mail_smtp = self.get_events_conf(config, 'smtp') if mail_smtp: env['smtp'] = mail_smtp subject = '[workflow %(event)s] %(workflow)s' % { 'workflow': ctx.workflow, 'event': ctx.event } stdin_str = '' for name, value in [('workflow event', ctx.event), ('reason', ctx.reason), ('workflow', ctx.workflow), ('host', ctx.host), ('port', ctx.port), ('owner', ctx.owner)]: if value: stdin_str += '%s: %s\n' % (name, value) mail_footer_tmpl = self.get_events_conf(config, 'footer') if mail_footer_tmpl: # BACK COMPAT: "suite" deprecated # url: # https://github.com/cylc/cylc-flow/pull/4174 # from: # Cylc 8 # remove at: # Cylc 9 try: stdin_str_footer = (mail_footer_tmpl + '\n') % { 'host': ctx.host, 'port': ctx.port, 'owner': ctx.owner, 'suite': ctx.workflow, # deprecated 'workflow': ctx.workflow } except KeyError: LOG.warning("Ignoring bad mail footer template: %s" % (mail_footer_tmpl)) else: stdin_str += stdin_str_footer proc_ctx = SubProcContext( (self.WORKFLOW_EVENT_HANDLER, ctx.event), [ 'mail', '-s', subject, '-r', self.get_events_conf(config, 'from', 'notifications@' + get_host()), self.get_events_conf(config, 'to', get_user()), ], env=env, stdin_str=stdin_str) if self.proc_pool.closed: # Run command in foreground if process pool is closed self.proc_pool.run_command(proc_ctx) self._run_event_handlers_callback(proc_ctx) else: # Run command using process pool otherwise self.proc_pool.put_command( proc_ctx, callback=self._run_event_mail_callback)
def main(parser, options, suite, *task_ids): """cylc submit CLI. No TASK EVENT HOOKS are set for the submit command because there is no scheduler instance watching for task failure etc. Note: a suite contact env file is not written by this command (it would overwrite the real one if the suite is running). """ if not options.verbose and not options.debug: LOG.setLevel(WARNING) for task_id in task_ids: if not TaskID.is_valid_id(task_id): raise UserInputError("Invalid task ID %s" % task_id) suiterc = get_suite_rc(suite) suite_dir = os.path.dirname(suiterc) # For user-defined batch system handlers sys.path.append(os.path.join(suite_dir, 'python')) # Load suite config and tasks config = SuiteConfig( suite, suiterc, options, load_template_vars(options.templatevars, options.templatevars_file)) itasks = [] for task_id in task_ids: name_str, point_str = TaskID.split(task_id) taskdefs = config.find_taskdefs(name_str) if not taskdefs: raise UserInputError("No task found for %s" % task_id) for taskdef in taskdefs: itasks.append( TaskProxy(taskdef, get_point(point_str).standardise(), is_startup=True)) # Initialise job submit environment make_suite_run_tree(suite) # Extract job.sh from library, for use in job scripts. extract_resources(get_suite_srv_dir(suite), ['etc/job.sh']) pool = SubProcPool() owner = get_user() job_pool = JobPool(suite, owner) db_mgr = SuiteDatabaseManager() task_job_mgr = TaskJobManager( suite, pool, db_mgr, TaskEventsManager(suite, pool, db_mgr, BroadcastMgr(db_mgr), job_pool), job_pool) task_job_mgr.task_remote_mgr.single_task_mode = True task_job_mgr.job_file_writer.set_suite_env({ 'CYLC_UTC': str(config.cfg['cylc']['UTC mode']), 'CYLC_DEBUG': str(cylc.flow.flags.debug).lower(), 'CYLC_VERBOSE': str(cylc.flow.flags.verbose).lower(), 'CYLC_SUITE_NAME': suite, 'CYLC_CYCLING_MODE': str(config.cfg['scheduling']['cycling mode']), 'CYLC_SUITE_INITIAL_CYCLE_POINT': str(config.cfg['scheduling']['initial cycle point']), 'CYLC_SUITE_FINAL_CYCLE_POINT': str(config.cfg['scheduling']['final cycle point']), }) ret_code = 0 waiting_tasks = list(itasks) if options.dry_run: while waiting_tasks: prep_tasks, bad_tasks = task_job_mgr.prep_submit_task_jobs( suite, waiting_tasks, dry_run=True) for itask in prep_tasks + bad_tasks: waiting_tasks.remove(itask) if waiting_tasks: task_job_mgr.proc_pool.process() sleep(1.0) for itask in itasks: if itask.local_job_file_path: print(('JOB SCRIPT=%s' % itask.local_job_file_path)) else: print(('Unable to prepare job file for %s' % itask.identity), file=sys.stderr) ret_code = 1 else: while waiting_tasks: for itask in task_job_mgr.submit_task_jobs(suite, waiting_tasks): waiting_tasks.remove(itask) if waiting_tasks: task_job_mgr.proc_pool.process() sleep(1.0) while task_job_mgr.proc_pool.is_not_done(): task_job_mgr.proc_pool.process() for itask in itasks: if itask.summary.get('submit_method_id') is not None: print(('[%s] Job ID: %s' % (itask.identity, itask.summary['submit_method_id']))) if itask.state(TASK_STATUS_SUBMIT_FAILED): ret_code = 1 sys.exit(ret_code)
def _load_remote_item(self, item, reg, owner, host): """Load content of service item from remote [owner@]host via SSH.""" if not is_remote(host, owner): return if host is None: host = 'localhost' if owner is None: owner = get_user() if item == self.FILE_BASE_CONTACT and not is_remote_host(host): # Attempt to read suite contact file via the local filesystem. path = r'%(run_d)s/%(srv_base)s' % { 'run_d': glbl_cfg().get_derived_host_item( reg, 'suite run directory', 'localhost', owner, replace_home=False), 'srv_base': self.DIR_BASE_SRV, } content = self._load_local_item(item, path) if content is not None: return content # Else drop through and attempt via ssh to the suite account. # Prefix STDOUT to ensure returned content is relevant prefix = r'[CYLC-AUTH] %(suite)s' % {'suite': reg} # Attempt to cat passphrase file under suite service directory script = ( r"""echo '%(prefix)s'; """ r'''cat "%(run_d)s/%(srv_base)s/%(item)s"''' ) % { 'prefix': prefix, 'run_d': glbl_cfg().get_derived_host_item( reg, 'suite run directory', host, owner), 'srv_base': self.DIR_BASE_SRV, 'item': item } import shlex command = shlex.split( glbl_cfg().get_host_item('ssh command', host, owner)) command += ['-n', owner + '@' + host, script] from subprocess import Popen, PIPE try: proc = Popen( command, stdin=open(os.devnull), stdout=PIPE, stderr=PIPE) except OSError: if cylc.flow.flags.debug: import traceback traceback.print_exc() return out, err = (f.decode() for f in proc.communicate()) ret_code = proc.wait() # Extract passphrase from STDOUT # It should live in the line with the correct prefix content = "" can_read = False for line in out.splitlines(True): if can_read: content += line elif line.strip() == prefix: can_read = True if not content or ret_code: LOG.debug( '$ %(command)s # code=%(ret_code)s\n%(err)s', { 'command': command, # STDOUT may contain passphrase, so not safe to print # 'out': out, 'err': err, 'ret_code': ret_code, }) return return content
def test_get_user(self): """get_user.""" self.assertEqual(os.getenv('USER'), get_user())
def _setup_custom_event_handlers(self, itask, event, message): """Set up custom task event handlers.""" handlers = self._get_events_conf(itask, event + ' handler') if (handlers is None and event in self._get_events_conf( itask, 'handler events', [])): handlers = self._get_events_conf(itask, 'handlers') if handlers is None: return retry_delays = self._get_events_conf( itask, 'handler retry delays', self.get_host_conf(itask, "task event handler retry delays")) if not retry_delays: retry_delays = [0] # There can be multiple custom event handlers for i, handler in enumerate(handlers): if event in self.NON_UNIQUE_EVENTS: key1 = ('%s-%02d' % (self.HANDLER_CUSTOM, i), '%s-%d' % (event, itask.non_unique_events.get(event, 1))) else: key1 = ('%s-%02d' % (self.HANDLER_CUSTOM, i), event) id_key = (key1, str(itask.point), itask.tdef.name, itask.submit_num) if id_key in self.event_timers: continue # Note: user@host may not always be set for a submit number, e.g. # on late event or if host select command fails. Use null string to # prevent issues in this case. user_at_host = itask.summary['job_hosts'].get(itask.submit_num, '') if user_at_host and '@' not in user_at_host: # (only has 'user@' on the front if user is not suite owner). user_at_host = '%s@%s' % (get_user(), user_at_host) # Custom event handler can be a command template string # or a command that takes 4 arguments (classic interface) # Note quote() fails on None, need str(None). try: handler_data = { "event": quote(event), "suite": quote(self.suite), 'suite_uuid': quote(str(self.uuid_str)), "point": quote(str(itask.point)), "name": quote(itask.tdef.name), "submit_num": itask.submit_num, "try_num": itask.get_try_num(), "id": quote(itask.identity), "message": quote(message), "batch_sys_name": quote(str(itask.summary['batch_sys_name'])), "batch_sys_job_id": quote(str(itask.summary['submit_method_id'])), "submit_time": quote(str(itask.summary['submitted_time_string'])), "start_time": quote(str(itask.summary['started_time_string'])), "finish_time": quote(str(itask.summary['finished_time_string'])), "user@host": quote(user_at_host) } if self.suite_cfg: for key, value in self.suite_cfg.items(): if key == "URL": handler_data["suite_url"] = quote(value) else: handler_data["suite_" + key] = quote(value) if itask.tdef.rtconfig['meta']: for key, value in itask.tdef.rtconfig['meta'].items(): if key == "URL": handler_data["task_url"] = quote(value) handler_data[key] = quote(value) cmd = handler % (handler_data) except KeyError as exc: message = "%s/%s/%02d %s bad template: %s" % ( itask.point, itask.tdef.name, itask.submit_num, key1, exc) LOG.error(message) continue if cmd == handler: # Nothing substituted, assume classic interface cmd = "%s '%s' '%s' '%s' '%s'" % (handler, event, self.suite, itask.identity, message) LOG.debug("[%s] -Queueing %s handler: %s", itask, event, cmd) self.event_timers[id_key] = (TaskActionTimer( CustomTaskEventHandlerContext( key1, self.HANDLER_CUSTOM, cmd, ), retry_delays))