示例#1
0
    def _submit_task_job_callback(self, suite, itask, cmd_ctx, line):
        """Helper for _submit_task_jobs_callback, on one task job."""
        ctx = SubProcContext(self.JOBS_SUBMIT, None)
        ctx.out = line
        items = line.split("|")
        try:
            ctx.timestamp, _, ctx.ret_code = items[0:3]
        except ValueError:
            ctx.ret_code = 1
            ctx.cmd = cmd_ctx.cmd  # print original command on failure
        else:
            ctx.ret_code = int(ctx.ret_code)
            if ctx.ret_code:
                ctx.cmd = cmd_ctx.cmd  # print original command on failure
        log_task_job_activity(ctx, suite, itask.point, itask.tdef.name)

        if ctx.ret_code == SubProcPool.RET_CODE_SUITE_STOPPING:
            return

        job_d = get_task_job_id(itask.point, itask.tdef.name, itask.submit_num)
        try:
            itask.summary['submit_method_id'] = items[3]
            self.job_pool.set_job_attr(job_d, 'batch_sys_job_id', items[3])
        except IndexError:
            itask.summary['submit_method_id'] = None
        if itask.summary['submit_method_id'] == "None":
            itask.summary['submit_method_id'] = None
        if itask.summary['submit_method_id'] and ctx.ret_code == 0:
            self.task_events_mgr.process_message(itask, INFO,
                                                 TASK_OUTPUT_SUBMITTED,
                                                 ctx.timestamp)
        else:
            self.task_events_mgr.process_message(
                itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED,
                ctx.timestamp)
示例#2
0
 def _kill_task_job_callback(self, suite, itask, cmd_ctx, line):
     """Helper for _kill_task_jobs_callback, on one task job."""
     ctx = SubProcContext(self.JOBS_KILL, None)
     ctx.out = line
     try:
         ctx.timestamp, _, ctx.ret_code = line.split("|", 2)
     except ValueError:
         ctx.ret_code = 1
         ctx.cmd = cmd_ctx.cmd  # print original command on failure
     else:
         ctx.ret_code = int(ctx.ret_code)
         if ctx.ret_code:
             ctx.cmd = cmd_ctx.cmd  # print original command on failure
     log_task_job_activity(ctx, suite, itask.point, itask.tdef.name)
     log_lvl = INFO
     log_msg = 'killed'
     if ctx.ret_code:  # non-zero exit status
         log_lvl = WARNING
         log_msg = 'kill failed'
         itask.state.kill_failed = True
     elif itask.state.status == TASK_STATUS_SUBMITTED:
         self.task_events_mgr.process_message(
             itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED,
             ctx.timestamp)
     elif itask.state.status == TASK_STATUS_RUNNING:
         self.task_events_mgr.process_message(itask, CRITICAL,
                                              TASK_OUTPUT_FAILED)
     else:
         log_lvl = DEBUG
         log_msg = ('ignoring job kill result, unexpected task state: %s' %
                    itask.state.status)
     itask.set_summary_message(log_msg)
     LOG.log(
         log_lvl,
         "[%s] -job(%02d) %s" % (itask.identity, itask.submit_num, log_msg))
示例#3
0
 def _event_email_callback(self, proc_ctx, schd_ctx):
     """Call back when email notification command exits."""
     for id_key in proc_ctx.cmd_kwargs["id_keys"]:
         key1, point, name, submit_num = id_key
         try:
             if proc_ctx.ret_code == 0:
                 del self.event_timers[id_key]
                 log_ctx = SubProcContext((key1, submit_num), None)
                 log_ctx.ret_code = 0
                 log_task_job_activity(log_ctx, schd_ctx.suite, point, name,
                                       submit_num)
             else:
                 self.event_timers[id_key].unset_waiting()
         except KeyError as exc:
             LOG.exception(exc)
示例#4
0
文件: task_job_mgr.py 项目: cylc/cylc
 def _poll_task_job_message_callback(self, suite, itask, cmd_ctx, line):
     """Helper for _poll_task_jobs_callback, on message of one task job."""
     ctx = SubProcContext(self.JOBS_POLL, None)
     ctx.out = line
     try:
         event_time, severity, message = line.split("|")[2:5]
     except ValueError:
         ctx.ret_code = 1
         ctx.cmd = cmd_ctx.cmd  # print original command on failure
     else:
         ctx.ret_code = 0
         self.task_events_mgr.process_message(
             itask, severity, message, event_time,
             self.task_events_mgr.FLAG_POLLED)
     log_task_job_activity(ctx, suite, itask.point, itask.tdef.name)
示例#5
0
 def test_run_command_with_stdin_from_unicode(self):
     """Test STDIN from string with Unicode"""
     ctx = SubProcContext('meow', ['cat'], stdin_str='喵\n')
     SubProcPool.run_command(ctx)
     self.assertEqual(ctx.err, '')
     self.assertEqual(ctx.out, '喵\n')
     self.assertEqual(ctx.ret_code, 0)
 def _run_event_custom_handlers(self, config, ctx):
     """Helper for "run_event_handlers", custom event handlers."""
     # Look for event handlers
     # 1. Handlers for specific event
     # 2. General handlers
     handlers = self.get_events_conf(config, '%s handlers' % ctx.event)
     if not handlers and (ctx.event in self.get_events_conf(
             config, 'handler events', [])):
         handlers = self.get_events_conf(config, 'handlers')
     if not handlers:
         return
     for i, handler in enumerate(handlers):
         cmd_key = ('%s-%02d' % (self.WORKFLOW_EVENT_HANDLER, i), ctx.event)
         # Handler command may be a string for substitution
         # BACK COMPAT: suite, suite_uuid are deprecated
         # url:
         #     https://github.com/cylc/cylc-flow/pull/4174
         # from:
         #     Cylc 8
         # remove at:
         #     Cylc 9
         try:
             handler_data = {
                 'event': quote(ctx.event),
                 'message': quote(ctx.reason),
                 'workflow': quote(ctx.workflow),
                 'workflow_uuid': quote(ctx.uuid_str),
                 'suite': quote(ctx.workflow),  # deprecated
                 'suite_uuid': quote(ctx.uuid_str),  # deprecated
             }
             if config.cfg['meta']:
                 for key, value in config.cfg['meta'].items():
                     if key == "URL":
                         handler_data["workflow_url"] = quote(value)
                         handler_data["suite_url"] = quote(value)
                     handler_data[key] = quote(value)
             cmd = handler % (handler_data)
         except KeyError as exc:
             message = "%s bad template: %s" % (cmd_key, exc)
             LOG.error(message)
             continue
         if cmd == handler:
             # Nothing substituted, assume classic interface
             cmd = "%s '%s' '%s' '%s'" % (handler, ctx.event, ctx.workflow,
                                          ctx.reason)
         proc_ctx = SubProcContext(
             cmd_key,
             cmd,
             env=dict(os.environ),
             shell=True  # nosec (designed to run user defined code)
         )
         if self.proc_pool.closed:
             # Run command in foreground if abort on failure is set or if
             # process pool is closed
             self.proc_pool.run_command(proc_ctx)
             self._run_event_handlers_callback(proc_ctx)
         else:
             # Run command using process pool otherwise
             self.proc_pool.put_command(
                 proc_ctx, callback=self._run_event_handlers_callback)
示例#7
0
    def _process_job_logs_retrieval(self, schd_ctx, ctx, id_keys):
        """Process retrieval of task job logs from remote user@host."""
        if ctx.user_at_host and "@" in ctx.user_at_host:
            s_user, s_host = ctx.user_at_host.split("@", 1)
        else:
            s_user, s_host = (None, ctx.user_at_host)
        ssh_str = str(glbl_cfg().get_host_item("ssh command", s_host, s_user))
        rsync_str = str(glbl_cfg().get_host_item("retrieve job logs command",
                                                 s_host, s_user))

        cmd = shlex.split(rsync_str) + ["--rsh=" + ssh_str]
        if LOG.isEnabledFor(DEBUG):
            cmd.append("-v")
        if ctx.max_size:
            cmd.append("--max-size=%s" % (ctx.max_size, ))
        # Includes and excludes
        includes = set()
        for _, point, name, submit_num in id_keys:
            # Include relevant directories, all levels needed
            includes.add("/%s" % (point))
            includes.add("/%s/%s" % (point, name))
            includes.add("/%s/%s/%02d" % (point, name, submit_num))
            includes.add("/%s/%s/%02d/**" % (point, name, submit_num))
        cmd += ["--include=%s" % (include) for include in sorted(includes)]
        cmd.append("--exclude=/**")  # exclude everything else
        # Remote source
        cmd.append(
            "%s:%s/" %
            (ctx.user_at_host,
             get_remote_suite_run_job_dir(s_host, s_user, schd_ctx.suite)))
        # Local target
        cmd.append(get_suite_run_job_dir(schd_ctx.suite) + "/")
        self.proc_pool.put_command(
            SubProcContext(ctx, cmd, env=dict(os.environ), id_keys=id_keys),
            self._job_logs_retrieval_callback, [schd_ctx])
示例#8
0
    def _run_job_cmd(self, cmd_key, suite, itasks, callback):
        """Run job commands, e.g. poll, kill, etc.

        Group itasks with their user@host.
        Put a job command for each user@host to the multiprocess pool.

        """
        if not itasks:
            return
        auth_itasks = {}
        for itask in itasks:
            if (itask.task_host, itask.task_owner) not in auth_itasks:
                auth_itasks[(itask.task_host, itask.task_owner)] = []
            auth_itasks[(itask.task_host, itask.task_owner)].append(itask)
        for (host, owner), itasks in sorted(auth_itasks.items()):
            cmd = ["cylc", cmd_key]
            if LOG.isEnabledFor(DEBUG):
                cmd.append("--debug")
            if is_remote_host(host):
                cmd.append("--host=%s" % (host))
            if is_remote_user(owner):
                cmd.append("--user=%s" % (owner))
            cmd.append("--")
            cmd.append(get_remote_suite_run_job_dir(host, owner, suite))
            job_log_dirs = []
            for itask in sorted(itasks, key=lambda itask: itask.identity):
                job_log_dirs.append(
                    get_task_job_id(itask.point, itask.tdef.name,
                                    itask.submit_num))
            cmd += job_log_dirs
            self.proc_pool.put_command(SubProcContext(cmd_key, cmd), callback,
                                       [suite, itasks])
示例#9
0
 def _prep_submit_task_job_error(self, suite, itask, dry_run, action, exc):
     """Helper for self._prep_submit_task_job. On error."""
     LOG.debug("submit_num %s" % itask.submit_num)
     LOG.debug(traceback.format_exc())
     LOG.error(exc)
     log_task_job_activity(SubProcContext(self.JOBS_SUBMIT,
                                          action,
                                          err=exc,
                                          ret_code=1),
                           suite,
                           itask.point,
                           itask.tdef.name,
                           submit_num=itask.submit_num)
     if not dry_run:
         # Persist
         self.suite_db_mgr.put_insert_task_jobs(
             itask, {
                 'is_manual_submit': itask.is_manual_submit,
                 'try_num': itask.get_try_num(),
                 'time_submit': get_current_time_string(),
                 'batch_sys_name': itask.summary.get('batch_sys_name'),
             })
         itask.is_manual_submit = False
         self.task_events_mgr.process_message(
             itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED)
示例#10
0
 def test_run_command_returns_0(self):
     """Test basic usage, command returns 0"""
     ctx = SubProcContext('truth', ['true'])
     SubProcPool.run_command(ctx)
     self.assertEqual(ctx.err, '')
     self.assertEqual(ctx.out, '')
     self.assertEqual(ctx.ret_code, 0)
示例#11
0
 def test_run_command_writes_to_out(self):
     """Test basic usage, command writes to STDOUT"""
     ctx = SubProcContext('parrot', ['echo', 'pirate', 'urrrr'])
     SubProcPool.run_command(ctx)
     self.assertEqual(ctx.err, '')
     self.assertEqual(ctx.out, 'pirate urrrr\n')
     self.assertEqual(ctx.ret_code, 0)
示例#12
0
 def test_run_command_returns_1(self):
     """Test basic usage, command returns 1"""
     ctx = SubProcContext('lies', ['false'])
     SubProcPool.run_command(ctx)
     self.assertEqual(ctx.err, '')
     self.assertEqual(ctx.out, '')
     self.assertEqual(ctx.ret_code, 1)
示例#13
0
 def test_run_command_writes_to_err(self):
     """Test basic usage, command writes to STDERR"""
     ctx = SubProcContext('parrot2',
                          ['bash', '-c', 'echo pirate errrr >&2'])
     SubProcPool.run_command(ctx)
     self.assertEqual(ctx.err, 'pirate errrr\n')
     self.assertEqual(ctx.out, '')
     self.assertEqual(ctx.ret_code, 0)
示例#14
0
文件: task_job_mgr.py 项目: cylc/cylc
 def _kill_task_job_callback(self, suite, itask, cmd_ctx, line):
     """Helper for _kill_task_jobs_callback, on one task job."""
     ctx = SubProcContext(self.JOBS_KILL, None)
     ctx.out = line
     try:
         ctx.timestamp, _, ctx.ret_code = line.split("|", 2)
     except ValueError:
         ctx.ret_code = 1
         ctx.cmd = cmd_ctx.cmd  # print original command on failure
     else:
         ctx.ret_code = int(ctx.ret_code)
         if ctx.ret_code:
             ctx.cmd = cmd_ctx.cmd  # print original command on failure
     log_task_job_activity(ctx, suite, itask.point, itask.tdef.name)
     log_lvl = INFO
     log_msg = 'killed'
     if ctx.ret_code:  # non-zero exit status
         log_lvl = WARNING
         log_msg = 'kill failed'
         itask.state.kill_failed = True
     elif itask.state.status == TASK_STATUS_SUBMITTED:
         self.task_events_mgr.process_message(
             itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED,
             ctx.timestamp)
     elif itask.state.status == TASK_STATUS_RUNNING:
         self.task_events_mgr.process_message(
             itask, CRITICAL, TASK_OUTPUT_FAILED)
     else:
         log_lvl = DEBUG
         log_msg = (
             'ignoring job kill result, unexpected task state: %s' %
             itask.state.status)
     itask.set_summary_message(log_msg)
     LOG.log(log_lvl, "[%s] -job(%02d) %s" % (
         itask.identity, itask.submit_num, log_msg))
示例#15
0
文件: task_job_mgr.py 项目: cylc/cylc
    def _submit_task_job_callback(self, suite, itask, cmd_ctx, line):
        """Helper for _submit_task_jobs_callback, on one task job."""
        ctx = SubProcContext(self.JOBS_SUBMIT, None)
        ctx.out = line
        items = line.split("|")
        try:
            ctx.timestamp, _, ctx.ret_code = items[0:3]
        except ValueError:
            ctx.ret_code = 1
            ctx.cmd = cmd_ctx.cmd  # print original command on failure
        else:
            ctx.ret_code = int(ctx.ret_code)
            if ctx.ret_code:
                ctx.cmd = cmd_ctx.cmd  # print original command on failure
        log_task_job_activity(ctx, suite, itask.point, itask.tdef.name)

        if ctx.ret_code == SubProcPool.RET_CODE_SUITE_STOPPING:
            return

        try:
            itask.summary['submit_method_id'] = items[3]
        except IndexError:
            itask.summary['submit_method_id'] = None
        if itask.summary['submit_method_id'] == "None":
            itask.summary['submit_method_id'] = None
        if itask.summary['submit_method_id'] and ctx.ret_code == 0:
            self.task_events_mgr.process_message(
                itask, INFO, TASK_OUTPUT_SUBMITTED, ctx.timestamp)
        else:
            self.task_events_mgr.process_message(
                itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED,
                ctx.timestamp)
示例#16
0
 def _process_event_email(self, schd_ctx, ctx, id_keys):
     """Process event notification, by email."""
     if len(id_keys) == 1:
         # 1 event from 1 task
         (_, event), point, name, submit_num = id_keys[0]
         subject = "[%s/%s/%02d %s] %s" % (point, name, submit_num, event,
                                           schd_ctx.suite)
     else:
         event_set = set(id_key[0][1] for id_key in id_keys)
         if len(event_set) == 1:
             # 1 event from n tasks
             subject = "[%d tasks %s] %s" % (len(id_keys), event_set.pop(),
                                             schd_ctx.suite)
         else:
             # n events from n tasks
             subject = "[%d task events] %s" % (len(id_keys),
                                                schd_ctx.suite)
     cmd = ["mail", "-s", subject]
     # From: and To:
     cmd.append("-r")
     cmd.append(ctx.mail_from)
     cmd.append(ctx.mail_to)
     # STDIN for mail, tasks
     stdin_str = ""
     for id_key in sorted(id_keys):
         (_, event), point, name, submit_num = id_key
         stdin_str += "%s: %s/%s/%02d\n" % (event, point, name, submit_num)
     # STDIN for mail, event info + suite detail
     stdin_str += "\n"
     for label, value in [('suite', schd_ctx.suite),
                          ("host", schd_ctx.host), ("port", schd_ctx.port),
                          ("owner", schd_ctx.owner)]:
         if value:
             stdin_str += "%s: %s\n" % (label, value)
     if self.mail_footer:
         stdin_str += (self.mail_footer + "\n") % {
             "host": schd_ctx.host,
             "port": schd_ctx.port,
             "owner": schd_ctx.owner,
             "suite": schd_ctx.suite
         }
     # SMTP server
     env = dict(os.environ)
     mail_smtp = ctx.mail_smtp
     if mail_smtp:
         env["smtp"] = mail_smtp
     self.proc_pool.put_command(
         SubProcContext(
             ctx,
             cmd,
             env=env,
             stdin_str=stdin_str,
             id_keys=id_keys,
         ), self._event_email_callback, [schd_ctx])
示例#17
0
 def test_run_command_with_stdin_from_path(self):
     """Test STDIN from a single file path"""
     handle = NamedTemporaryFile()
     handle.write('catches mice.\n'.encode('UTF-8'))
     handle.seek(0)
     ctx = SubProcContext('meow', ['cat'], stdin_files=[handle.name])
     SubProcPool.run_command(ctx)
     self.assertEqual(ctx.err, '')
     self.assertEqual(ctx.out, 'catches mice.\n')
     self.assertEqual(ctx.ret_code, 0)
     handle.close()
示例#18
0
    def remote_init(self, platform: Dict[str, Any],
                    curve_auth: 'ThreadAuthenticator',
                    client_pub_key_dir: str) -> None:
        """Initialise a remote host if necessary.

        Call "cylc remote-init" to install workflow items to remote:
            ".service/contact": For TCP task communication
            "python/": if source exists

        Args:
            platform: A dict containing settings relating to platform used in
                this remote installation.
            curve_auth: The ZMQ authenticator.
            client_pub_key_dir: Client public key directory, used by the
                ZMQ authenticator.

        """
        install_target = platform['install target']
        if install_target == get_localhost_install_target():
            self.remote_init_map[install_target] = REMOTE_FILE_INSTALL_DONE
            return
        # Set status of install target to in progress while waiting for remote
        # initialisation to finish
        self.remote_init_map[install_target] = REMOTE_INIT_IN_PROGRESS

        # Determine what items to install
        comms_meth: CommsMeth = CommsMeth(platform['communication method'])
        items = self._remote_init_items(comms_meth)

        # Create a TAR archive with the service files,
        # so they can be sent later via SSH's STDIN to the task remote.
        tmphandle = self.proc_pool.get_temporary_file()
        tarhandle = tarfile.open(fileobj=tmphandle, mode='w')
        for path, arcname in items:
            tarhandle.add(path, arcname=arcname)
        tarhandle.close()
        tmphandle.seek(0)
        # Build the remote-init command to be run over ssh
        cmd = ['remote-init']
        if cylc.flow.flags.verbosity > 1:
            cmd.append('--debug')
        cmd.append(str(install_target))
        cmd.append(get_remote_workflow_run_dir(self.workflow))
        dirs_to_symlink = get_dirs_to_symlink(install_target, self.workflow)
        for key, value in dirs_to_symlink.items():
            if value is not None:
                cmd.append(f"{key}={quote(value)} ")
        # Create the ssh command
        cmd = construct_ssh_cmd(cmd, platform)
        self.proc_pool.put_command(
            SubProcContext('remote-init', cmd, stdin_files=[tmphandle]),
            self._remote_init_callback,
            [platform, tmphandle, curve_auth, client_pub_key_dir])
示例#19
0
 def _run_event_custom_handlers(self, config, ctx):
     """Helper for "run_event_handlers", custom event handlers."""
     # Look for event handlers
     # 1. Handlers for specific event
     # 2. General handlers
     handlers = self.get_events_conf(config, '%s handler' % ctx.event)
     if not handlers and (ctx.event in self.get_events_conf(
             config, 'handler events', [])):
         handlers = self.get_events_conf(config, 'handlers')
     if not handlers:
         return
     for i, handler in enumerate(handlers):
         cmd_key = ('%s-%02d' % (self.SUITE_EVENT_HANDLER, i), ctx.event)
         # Handler command may be a string for substitution
         abort_on_error = self.get_events_conf(
             config, 'abort if %s handler fails' % ctx.event)
         try:
             handler_data = {
                 'event': quote(ctx.event),
                 'message': quote(ctx.reason),
                 'suite': quote(ctx.suite),
                 'suite_uuid': quote(str(ctx.uuid_str)),
             }
             if config.cfg['meta']:
                 for key, value in config.cfg['meta'].items():
                     if key == "URL":
                         handler_data["suite_url"] = quote(value)
                     handler_data[key] = quote(value)
             cmd = handler % (handler_data)
         except KeyError as exc:
             message = "%s bad template: %s" % (cmd_key, exc)
             LOG.error(message)
             if abort_on_error:
                 raise SuiteEventError(message)
             continue
         if cmd == handler:
             # Nothing substituted, assume classic interface
             cmd = "%s '%s' '%s' '%s'" % (handler, ctx.event, ctx.suite,
                                          ctx.reason)
         proc_ctx = SubProcContext(cmd_key,
                                   cmd,
                                   env=dict(os.environ),
                                   shell=True)
         if abort_on_error or self.proc_pool.closed:
             # Run command in foreground if abort on failure is set or if
             # process pool is closed
             self.proc_pool.run_command(proc_ctx)
             self._run_event_handlers_callback(
                 proc_ctx, abort_on_error=abort_on_error)
         else:
             # Run command using process pool otherwise
             self.proc_pool.put_command(proc_ctx,
                                        self._run_event_handlers_callback)
示例#20
0
    def test_log_debug_on_noerror_exit_code(self, cylc_log):
        """Test that a debug log is emitted when the log retrieval command
        exited with an non-error code (i.e. 0).

        :param cylc_log: mocked cylc logger
        :type cylc_log: mock.MagicMock
        """
        task_events_manager = TaskEventsManager(None, None, None, None, None)
        proc_ctx = SubProcContext(cmd_key=None, cmd="ls /tmp/123", ret_code=0,
                                  err="", id_keys=[])
        task_events_manager._job_logs_retrieval_callback(proc_ctx, None)
        self.assertEqual(1, cylc_log.debug.call_count)
        self.assertTrue(cylc_log.debug.call_args.contains("ls /tmp/123"))
示例#21
0
    def test_log_error_on_error_exit_code(self, cylc_log):
        """Test that an error log is emitted when the log retrieval command
        exited with a code different than zero.

        :param cylc_log: mocked cylc logger
        :type cylc_log: mock.MagicMock
        """
        task_events_manager = TaskEventsManager(None, None, None, None, None)
        proc_ctx = SubProcContext(cmd_key=None, cmd="error", ret_code=1,
                                  err="Error!", id_keys=[])
        task_events_manager._job_logs_retrieval_callback(proc_ctx, None)
        self.assertEqual(1, cylc_log.error.call_count)
        self.assertTrue(cylc_log.error.call_args.contains("Error!"))
示例#22
0
    def remote_host_select(self, host_str):
        """Evaluate a task host string.

        Arguments:
            host_str (str):
                An explicit host name, a command in back-tick or $(command)
                format, or an environment variable holding a hostname.

        Return (str):
            None if evaluate of host_str is still taking place.
            'localhost' if host_str is not defined or if the evaluated host
            name is equivalent to 'localhost'.
            Otherwise, return the evaluated host name on success.

        Raise TaskRemoteMgmtError on error.

        """
        if not host_str:
            return 'localhost'

        # Host selection command: $(command) or `command`
        match = REC_COMMAND.match(host_str)
        if match:
            cmd_str = match.groups()[1]
            if cmd_str in self.remote_host_str_map:
                # Command recently launched
                value = self.remote_host_str_map[cmd_str]
                if isinstance(value, TaskRemoteMgmtError):
                    raise value  # command failed
                elif value is None:
                    return  # command not yet ready
                else:
                    host_str = value  # command succeeded
            else:
                # Command not launched (or already reset)
                self.proc_pool.put_command(
                    SubProcContext('remote-host-select',
                                   ['bash', '-c', cmd_str],
                                   env=dict(os.environ)),
                    self._remote_host_select_callback, [cmd_str])
                self.remote_host_str_map[cmd_str] = None
                return self.remote_host_str_map[cmd_str]

        # Environment variable substitution
        host_str = os.path.expandvars(host_str)
        # Remote?
        if is_remote_host(host_str):
            return host_str
        else:
            return 'localhost'
示例#23
0
 def _job_logs_retrieval_callback(self, proc_ctx, schd_ctx):
     """Call back when log job retrieval completes."""
     if proc_ctx.ret_code:
         LOG.error(proc_ctx)
     else:
         LOG.debug(proc_ctx)
     for id_key in proc_ctx.cmd_kwargs["id_keys"]:
         key1, point, name, submit_num = id_key
         try:
             # All completed jobs are expected to have a "job.out".
             fnames = [JOB_LOG_OUT]
             try:
                 if key1[1] not in 'succeeded':
                     fnames.append(JOB_LOG_ERR)
             except TypeError:
                 pass
             fname_oks = {}
             for fname in fnames:
                 fname_oks[fname] = os.path.exists(
                     get_task_job_log(schd_ctx.suite, point, name,
                                      submit_num, fname))
             # All expected paths must exist to record a good attempt
             log_ctx = SubProcContext((key1, submit_num), None)
             if all(fname_oks.values()):
                 log_ctx.ret_code = 0
                 del self.event_timers[id_key]
             else:
                 log_ctx.ret_code = 1
                 log_ctx.err = "File(s) not retrieved:"
                 for fname, exist_ok in sorted(fname_oks.items()):
                     if not exist_ok:
                         log_ctx.err += " %s" % fname
                 self.event_timers[id_key].unset_waiting()
             log_task_job_activity(log_ctx, schd_ctx.suite, point, name,
                                   submit_num)
         except KeyError as exc:
             LOG.exception(exc)
示例#24
0
 def test_run_command_with_stdin_from_handles(self):
     """Test STDIN from multiple file handles"""
     handles = []
     for txt in ['catches mice.\n', 'eat fish.\n']:
         handle = TemporaryFile()
         handle.write(txt.encode('UTF-8'))
         handle.seek(0)
         handles.append(handle)
     ctx = SubProcContext('meow', ['cat'], stdin_files=handles)
     SubProcPool.run_command(ctx)
     self.assertEqual(ctx.err, '')
     self.assertEqual(ctx.out, 'catches mice.\neat fish.\n')
     self.assertEqual(ctx.ret_code, 0)
     for handle in handles:
         handle.close()
示例#25
0
 def _run_event_mail(self, config, ctx):
     """Helper for "run_event_handlers", do mail notification."""
     if ctx.event in self.get_events_conf(config, 'mail events', []):
         # SMTP server
         env = dict(os.environ)
         mail_smtp = self.get_events_conf(config, 'mail smtp')
         if mail_smtp:
             env['smtp'] = mail_smtp
         subject = '[suite %(event)s] %(suite)s' % {
             'suite': ctx.suite, 'event': ctx.event}
         stdin_str = ''
         for name, value in [
                 ('suite event', ctx.event),
                 ('reason', ctx.reason),
                 ('suite', ctx.suite),
                 ('host', ctx.host),
                 ('port', ctx.port),
                 ('owner', ctx.owner)]:
             if value:
                 stdin_str += '%s: %s\n' % (name, value)
         mail_footer_tmpl = self.get_events_conf(config, 'mail footer')
         if mail_footer_tmpl:
             stdin_str += (mail_footer_tmpl + '\n') % {
                 'host': ctx.host,
                 'port': ctx.port,
                 'owner': ctx.owner,
                 'suite': ctx.suite}
         proc_ctx = SubProcContext(
             (self.SUITE_EVENT_HANDLER, ctx.event),
             [
                 'mail',
                 '-s', subject,
                 '-r', self.get_events_conf(
                     config,
                     'mail from', 'notifications@' + get_host()),
                 self.get_events_conf(config, 'mail to', get_user()),
             ],
             env=env,
             stdin_str=stdin_str)
         if self.proc_pool.closed:
             # Run command in foreground if process pool is closed
             self.proc_pool.run_command(proc_ctx)
             self._run_event_handlers_callback(proc_ctx)
         else:
             # Run command using process pool otherwise
             self.proc_pool.put_command(
                 proc_ctx, self._run_event_mail_callback)
示例#26
0
 def _run_event_custom_handlers(self, schd, template_variables, event):
     """Helper for "run_event_handlers", custom event handlers."""
     # Look for event handlers
     # 1. Handlers for specific event
     # 2. General handlers
     config = schd.config
     handlers = self.get_events_conf(config, '%s handlers' % event)
     if not handlers and (
         event in
         self.get_events_conf(config, 'handler events', [])
     ):
         handlers = self.get_events_conf(config, 'handlers')
     if not handlers:
         return
     for i, handler in enumerate(handlers):
         cmd_key = ('%s-%02d' % (self.WORKFLOW_EVENT_HANDLER, i), event)
         try:
             cmd = handler % (template_variables)
         except KeyError as exc:
             message = f'{cmd_key} bad template: {handler}\n{exc}'
             LOG.error(message)
             continue
         if cmd == handler:
             # Nothing substituted, assume classic interface
             cmd = (
                 f"%(handler)s"
                 f" '%({EventData.Event.value})s'"
                 f" '%({EventData.Workflow.value})s'"
                 f" '%({EventData.Message.value})s'"
             ) % (
                 {'handler': handler, **template_variables}
             )
         proc_ctx = SubProcContext(
             cmd_key,
             cmd,
             env=dict(os.environ),
             shell=True  # nosec (designed to run user defined code)
         )
         if self.proc_pool.closed:
             # Run command in foreground if abort on failure is set or if
             # process pool is closed
             self.proc_pool.run_command(proc_ctx)
             self._run_event_handlers_callback(proc_ctx)
         else:
             # Run command using process pool otherwise
             self.proc_pool.put_command(
                 proc_ctx, callback=self._run_event_handlers_callback)
示例#27
0
    def file_install(self, platform):
        """Install required files on the remote install target.

        Included by default in the file installation:
            Files:
                .service/server.key  (required for ZMQ authentication)
            Directories:
                app/
                bin/
                etc/
                lib/
        """
        install_target = platform['install target']
        self.remote_init_map[install_target] = REMOTE_FILE_INSTALL_IN_PROGRESS
        src_path = get_workflow_run_dir(self.workflow)
        dst_path = get_remote_workflow_run_dir(self.workflow)
        install_target = platform['install target']
        try:
            cmd, host = construct_rsync_over_ssh_cmd(src_path,
                                                     dst_path,
                                                     platform,
                                                     self.rsync_includes,
                                                     bad_hosts=self.bad_hosts)
            ctx = SubProcContext('file-install', cmd, host)
        except NoHostsError as exc:
            LOG.error(
                PlatformError(
                    f'{PlatformError.MSG_INIT}\n{exc}',
                    platform['name'],
                ))
            self.remote_init_map[
                platform['install target']] = REMOTE_FILE_INSTALL_FAILED
            self.bad_hosts -= set(platform['hosts'])
            self.ready = True
        else:
            log_platform_event('file install', platform, host)
            self.proc_pool.put_command(
                ctx,
                bad_hosts=self.bad_hosts,
                callback=self._file_install_callback,
                callback_args=[platform, install_target],
                callback_255=self._file_install_callback_255,
            )
示例#28
0
 def _poll_task_job_message_callback(self, suite, itask, cmd_ctx, line):
     """Helper for _poll_task_jobs_callback, on message of one task job."""
     ctx = SubProcContext(self.JOBS_POLL, None)
     ctx.out = line
     try:
         event_time, severity, message = line.split("|")[2:5]
     except ValueError:
         ctx.ret_code = 1
         ctx.cmd = cmd_ctx.cmd  # print original command on failure
     else:
         ctx.ret_code = 0
         self.task_events_mgr.process_message(
             itask, severity, message, event_time,
             self.task_events_mgr.FLAG_POLLED)
     log_task_job_activity(ctx, suite, itask.point, itask.tdef.name)
示例#29
0
 def _send_mail(self, event, subject, message, schd, env):
     proc_ctx = SubProcContext(
         (self.WORKFLOW_EVENT_HANDLER, event),
         [
             'mail',
             '-s', subject,
             '-r', self.get_events_conf(
                 schd.config,
                 'from', 'notifications@' + get_host()),
             self.get_events_conf(schd.config, 'to', get_user()),
         ],
         env=env,
         stdin_str=message)
     if self.proc_pool.closed:
         # Run command in foreground if process pool is closed
         self.proc_pool.run_command(proc_ctx)
         self._run_event_handlers_callback(proc_ctx)
     else:
         # Run command using process pool otherwise
         self.proc_pool.put_command(
             proc_ctx, callback=self._run_event_mail_callback)
示例#30
0
    def _run_job_cmd(self, cmd_key, suite, itasks, callback):
        """Run job commands, e.g. poll, kill, etc.

        Group itasks with their platform_name and host.
        Put a job command for each group to the multiprocess pool.

        """
        if not itasks:
            return
        # sort itasks into lists based upon where they were run.
        auth_itasks = {}
        for itask in itasks:
            platform_n = itask.platform['name']
            if platform_n not in auth_itasks:
                auth_itasks[platform_n] = []
            auth_itasks[platform_n].append(itask)

        # Go through each list of itasks and carry out commands as required.
        for platform_n, itasks in sorted(auth_itasks.items()):
            platform = get_platform(platform_n)
            if is_remote_platform(platform):
                remote_mode = True
                cmd = [cmd_key]
            else:
                cmd = ["cylc", cmd_key]
                remote_mode = False
            if LOG.isEnabledFor(DEBUG):
                cmd.append("--debug")
            cmd.append("--")
            cmd.append(get_remote_suite_run_job_dir(platform, suite))
            job_log_dirs = []
            if remote_mode:
                cmd = construct_ssh_cmd(cmd, platform)
            for itask in sorted(itasks, key=lambda itask: itask.identity):
                job_log_dirs.append(
                    get_task_job_id(itask.point, itask.tdef.name,
                                    itask.submit_num))
            cmd += job_log_dirs
            self.proc_pool.put_command(SubProcContext(cmd_key, cmd), callback,
                                       [suite, itasks])
示例#31
0
 def _prep_submit_task_job_error(self, workflow, itask, action, exc):
     """Helper for self._prep_submit_task_job. On error."""
     LOG.debug("submit_num %s" % itask.submit_num)
     log_task_job_activity(SubProcContext(self.JOBS_SUBMIT,
                                          action,
                                          err=exc,
                                          ret_code=1),
                           workflow,
                           itask.point,
                           itask.tdef.name,
                           submit_num=itask.submit_num)
     # Persist
     self.workflow_db_mgr.put_insert_task_jobs(
         itask, {
             'is_manual_submit': itask.is_manual_submit,
             'try_num': itask.get_try_num(),
             'time_submit': get_current_time_string(),
             'job_runner_name': itask.summary.get('job_runner_name'),
         })
     itask.is_manual_submit = False
     self.task_events_mgr.process_message(
         itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED)
示例#32
0
    def file_install(self, platform):
        """Install required files on the remote install target.

        Included by default in the file installation:
            Files:
                .service/server.key  (required for ZMQ authentication)
            Directories:
                app/
                bin/
                etc/
                lib/
        """
        install_target = platform['install target']
        self.remote_init_map[install_target] = REMOTE_FILE_INSTALL_IN_PROGRESS
        src_path = get_workflow_run_dir(self.workflow)
        dst_path = get_remote_workflow_run_dir(self.workflow)
        install_target = platform['install target']
        ctx = SubProcContext(
            'file-install',
            construct_rsync_over_ssh_cmd(src_path, dst_path, platform,
                                         self.rsync_includes))
        LOG.debug(f"Begin file installation on {install_target}")
        self.proc_pool.put_command(ctx, self._file_install_callback,
                                   [install_target])
示例#33
0
文件: task_job_mgr.py 项目: cylc/cylc
    def _poll_task_job_callback(self, suite, itask, cmd_ctx, line):
        """Helper for _poll_task_jobs_callback, on one task job."""
        ctx = SubProcContext(self.JOBS_POLL, None)
        ctx.out = line
        ctx.ret_code = 0

        # See cylc.flow.batch_sys_manager.JobPollContext
        try:
            job_log_dir, context = line.split('|')[1:3]
            items = json.loads(context)
            jp_ctx = JobPollContext(job_log_dir, **items)
        except TypeError:
            itask.set_summary_message(self.POLL_FAIL)
            ctx.cmd = cmd_ctx.cmd  # print original command on failure
            return
        except ValueError:
            # back compat for cylc 7.7.1 and previous
            try:
                values = line.split('|')
                items = dict(  # done this way to ensure IndexError is raised
                    (key, values[x]) for
                    x, key in enumerate(JobPollContext.CONTEXT_ATTRIBUTES))
                job_log_dir = items.pop('job_log_dir')
            except (ValueError, IndexError):
                itask.set_summary_message(self.POLL_FAIL)
                ctx.cmd = cmd_ctx.cmd  # print original command on failure
                return
        finally:
            log_task_job_activity(ctx, suite, itask.point, itask.tdef.name)

        flag = self.task_events_mgr.FLAG_POLLED
        if jp_ctx.run_status == 1 and jp_ctx.run_signal in ["ERR", "EXIT"]:
            # Failed normally
            self.task_events_mgr.process_message(
                itask, INFO, TASK_OUTPUT_FAILED, jp_ctx.time_run_exit, flag)
        elif jp_ctx.run_status == 1 and jp_ctx.batch_sys_exit_polled == 1:
            # Failed by a signal, and no longer in batch system
            self.task_events_mgr.process_message(
                itask, INFO, TASK_OUTPUT_FAILED, jp_ctx.time_run_exit, flag)
            self.task_events_mgr.process_message(
                itask, INFO, FAIL_MESSAGE_PREFIX + jp_ctx.run_signal,
                jp_ctx.time_run_exit,
                flag)
        elif jp_ctx.run_status == 1:
            # The job has terminated, but is still managed by batch system.
            # Some batch system may restart a job in this state, so don't
            # mark as failed yet.
            self.task_events_mgr.process_message(
                itask, INFO, TASK_OUTPUT_STARTED, jp_ctx.time_run, flag)
        elif jp_ctx.run_status == 0:
            # The job succeeded
            self.task_events_mgr.process_message(
                itask, INFO, TASK_OUTPUT_SUCCEEDED, jp_ctx.time_run_exit,
                flag)
        elif jp_ctx.time_run and jp_ctx.batch_sys_exit_polled == 1:
            # The job has terminated without executing the error trap
            self.task_events_mgr.process_message(
                itask, INFO, TASK_OUTPUT_FAILED, get_current_time_string(),
                flag)
        elif jp_ctx.time_run:
            # The job has started, and is still managed by batch system
            self.task_events_mgr.process_message(
                itask, INFO, TASK_OUTPUT_STARTED, jp_ctx.time_run, flag)
        elif jp_ctx.batch_sys_exit_polled == 1:
            # The job never ran, and no longer in batch system
            self.task_events_mgr.process_message(
                itask, INFO, self.task_events_mgr.EVENT_SUBMIT_FAILED,
                jp_ctx.time_submit_exit, flag)
        else:
            # The job never ran, and is in batch system
            self.task_events_mgr.process_message(
                itask, INFO, TASK_STATUS_SUBMITTED, jp_ctx.time_submit_exit,
                flag)
示例#34
0
    def _poll_task_job_callback(self, suite, itask, cmd_ctx, line):
        """Helper for _poll_task_jobs_callback, on one task job."""
        ctx = SubProcContext(self.JOBS_POLL, None)
        ctx.out = line
        ctx.ret_code = 0

        # See cylc.flow.batch_sys_manager.JobPollContext
        job_d = get_task_job_id(itask.point, itask.tdef.name, itask.submit_num)
        try:
            job_log_dir, context = line.split('|')[1:3]
            items = json.loads(context)
            jp_ctx = JobPollContext(job_log_dir, **items)
        except TypeError:
            itask.set_summary_message(self.POLL_FAIL)
            self.job_pool.add_job_msg(job_d, self.POLL_FAIL)
            ctx.cmd = cmd_ctx.cmd  # print original command on failure
            return
        except ValueError:
            # back compat for cylc 7.7.1 and previous
            try:
                values = line.split('|')
                items = dict(  # done this way to ensure IndexError is raised
                    (key, values[x])
                    for x, key in enumerate(JobPollContext.CONTEXT_ATTRIBUTES))
                job_log_dir = items.pop('job_log_dir')
            except (ValueError, IndexError):
                itask.set_summary_message(self.POLL_FAIL)
                self.job_pool.add_job_msg(job_d, self.POLL_FAIL)
                ctx.cmd = cmd_ctx.cmd  # print original command on failure
                return
        finally:
            log_task_job_activity(ctx, suite, itask.point, itask.tdef.name)

        flag = self.task_events_mgr.FLAG_POLLED
        if jp_ctx.run_status == 1 and jp_ctx.run_signal in ["ERR", "EXIT"]:
            # Failed normally
            self.task_events_mgr.process_message(itask, INFO,
                                                 TASK_OUTPUT_FAILED,
                                                 jp_ctx.time_run_exit, flag)
        elif jp_ctx.run_status == 1 and jp_ctx.batch_sys_exit_polled == 1:
            # Failed by a signal, and no longer in batch system
            self.task_events_mgr.process_message(itask, INFO,
                                                 TASK_OUTPUT_FAILED,
                                                 jp_ctx.time_run_exit, flag)
            self.task_events_mgr.process_message(
                itask, INFO, FAIL_MESSAGE_PREFIX + jp_ctx.run_signal,
                jp_ctx.time_run_exit, flag)
        elif jp_ctx.run_status == 1:
            # The job has terminated, but is still managed by batch system.
            # Some batch system may restart a job in this state, so don't
            # mark as failed yet.
            self.task_events_mgr.process_message(itask, INFO,
                                                 TASK_OUTPUT_STARTED,
                                                 jp_ctx.time_run, flag)
        elif jp_ctx.run_status == 0:
            # The job succeeded
            self.task_events_mgr.process_message(itask, INFO,
                                                 TASK_OUTPUT_SUCCEEDED,
                                                 jp_ctx.time_run_exit, flag)
        elif jp_ctx.time_run and jp_ctx.batch_sys_exit_polled == 1:
            # The job has terminated without executing the error trap
            self.task_events_mgr.process_message(itask, INFO,
                                                 TASK_OUTPUT_FAILED,
                                                 get_current_time_string(),
                                                 flag)
        elif jp_ctx.time_run:
            # The job has started, and is still managed by batch system
            self.task_events_mgr.process_message(itask, INFO,
                                                 TASK_OUTPUT_STARTED,
                                                 jp_ctx.time_run, flag)
        elif jp_ctx.batch_sys_exit_polled == 1:
            # The job never ran, and no longer in batch system
            self.task_events_mgr.process_message(
                itask, INFO, self.task_events_mgr.EVENT_SUBMIT_FAILED,
                jp_ctx.time_submit_exit, flag)
        else:
            # The job never ran, and is in batch system
            self.task_events_mgr.process_message(itask, INFO,
                                                 TASK_STATUS_SUBMITTED,
                                                 jp_ctx.time_submit_exit, flag)