Exemplo n.º 1
0
 def _process_message_submit_failed(self, itask, event_time):
     """Helper for process_message, handle a submit-failed message."""
     LOG.error(self.EVENT_SUBMIT_FAILED, itask=itask)
     if event_time is None:
         event_time = get_current_time_string()
     self.suite_db_mgr.put_update_task_jobs(
         itask, {
             "time_submit_exit": get_current_time_string(),
             "submit_status": 1,
         })
     itask.summary['submit_method_id'] = None
     if (TASK_STATUS_SUBMIT_RETRYING not in itask.try_timers or
             itask.try_timers[TASK_STATUS_SUBMIT_RETRYING].next() is None):
         # No submission retry lined up: definitive failure.
         self.pflag = True
         # See github #476.
         if itask.state.reset_state(TASK_STATUS_SUBMIT_FAILED):
             self.setup_event_handlers(itask, self.EVENT_SUBMIT_FAILED,
                                       'job %s' % self.EVENT_SUBMIT_FAILED)
     else:
         # There is a submission retry lined up.
         timer = itask.try_timers[TASK_STATUS_SUBMIT_RETRYING]
         delay_msg = "submit-retrying in %s" % timer.delay_timeout_as_str()
         msg = "%s, %s" % (self.EVENT_SUBMIT_FAILED, delay_msg)
         LOG.info("job(%02d) %s" % (itask.submit_num, msg), itask=itask)
         itask.summary['latest_message'] = msg
         if itask.state.reset_state(TASK_STATUS_SUBMIT_RETRYING):
             self.setup_event_handlers(
                 itask, self.EVENT_SUBMIT_RETRY,
                 "job %s, %s" % (self.EVENT_SUBMIT_FAILED, delay_msg))
     self._reset_job_timers(itask)
Exemplo n.º 2
0
 def _report_connection_if_denied(self):
     """Log an (un?)successful connection attempt."""
     prog_name, user, host, uuid = _get_client_info()[1:]
     connection_denied = self._get_client_connection_denied()
     if connection_denied:
         LOG.warning(self.__class__.LOG_CONNECT_DENIED_TMPL % (
             user, host, prog_name, uuid))
Exemplo n.º 3
0
 def _process_message_succeeded(self, itask, event_time):
     """Helper for process_message, handle a succeeded message."""
     self.pflag = True
     itask.set_summary_time('finished', event_time)
     self.suite_db_mgr.put_update_task_jobs(itask, {
         "run_status": 0,
         "time_run_exit": event_time,
     })
     # Update mean elapsed time only on task succeeded.
     if itask.summary['started_time'] is not None:
         itask.tdef.elapsed_times.append(itask.summary['finished_time'] -
                                         itask.summary['started_time'])
     if not itask.state.outputs.all_completed():
         msg = ""
         for output in itask.state.outputs.get_not_completed():
             if output not in [
                     TASK_OUTPUT_EXPIRED, TASK_OUTPUT_SUBMIT_FAILED,
                     TASK_OUTPUT_FAILED
             ]:
                 msg += "\n  " + output
         if msg:
             LOG.info("Succeeded with outputs not completed: %s" % msg,
                      itask=itask)
     if itask.state.reset_state(TASK_STATUS_SUCCEEDED):
         self.setup_event_handlers(itask, "succeeded", "job succeeded")
     self._reset_job_timers(itask)
Exemplo n.º 4
0
 def _job_cmd_out_callback(self, suite, itask, cmd_ctx, line):
     """Callback on job command STDOUT/STDERR."""
     if cmd_ctx.cmd_kwargs.get("host") and cmd_ctx.cmd_kwargs.get("user"):
         user_at_host = "(%(user)s@%(host)s) " % cmd_ctx.cmd_kwargs
     elif cmd_ctx.cmd_kwargs.get("host"):
         user_at_host = "(%(host)s) " % cmd_ctx.cmd_kwargs
     elif cmd_ctx.cmd_kwargs.get("user"):
         user_at_host = "(%(user)s@localhost) " % cmd_ctx.cmd_kwargs
     else:
         user_at_host = ""
     try:
         timestamp, _, content = line.split("|")
     except ValueError:
         pass
     else:
         line = "%s %s" % (timestamp, content)
     job_activity_log = self.task_events_mgr.get_task_job_activity_log(
         suite, itask.point, itask.tdef.name)
     try:
         with open(job_activity_log, "ab") as handle:
             if not line.endswith("\n"):
                 line += "\n"
             handle.write(user_at_host + line)
     except IOError as exc:
         LOG.warning("%s: write failed\n%s" % (job_activity_log, exc))
         LOG.warning(user_at_host + line, itask=itask)
Exemplo n.º 5
0
 def _set_state(self, status):
     """Set, log and record task status (normal change, not forced - don't
     update task_events table)."""
     if self.status == self.hold_swap:
         self.hold_swap = None
     if status == self.status and self.hold_swap is None:
         return
     o_status, o_hold_swap = self.status, self.hold_swap
     if status == TASK_STATUS_HELD:
         self.hold_swap = self.status
     elif (self.hold_swap == TASK_STATUS_HELD and
             status not in TASK_STATUSES_FINAL):
         self.hold_swap = status
         status = TASK_STATUS_HELD
     elif self.hold_swap:
         self.hold_swap = None
     self.status = status
     self.time_updated = get_current_time_string()
     flags.iflag = True
     # Log
     message = str(o_status)
     if o_hold_swap:
         message += " (%s)" % o_hold_swap
     message += " => %s" % self.status
     if self.hold_swap:
         message += " (%s)" % self.hold_swap
     LOG.debug(message, itask=self.identity)
Exemplo n.º 6
0
 def check_job_time(self, itask, now):
     """Check/handle job timeout and poll timer"""
     can_poll = self.check_poll_time(itask, now)
     if itask.timeout is None or now <= itask.timeout:
         return can_poll
     # Timeout reached for task, emit event and reset itask.timeout
     if itask.state.status == TASK_STATUS_RUNNING:
         time_ref = itask.summary['started_time']
         event = 'execution timeout'
     elif itask.state.status == TASK_STATUS_SUBMITTED:
         time_ref = itask.summary['submitted_time']
         event = 'submission timeout'
     msg = event
     try:
         msg += ' after %s' % intvl_as_str(itask.timeout - time_ref)
     except (TypeError, ValueError):
         # Badness in time_ref?
         pass
     itask.timeout = None  # emit event only once
     if msg and event:
         LOG.warning(msg, itask=itask)
         self.setup_event_handlers(itask, event, msg)
         return True
     else:
         return can_poll
Exemplo n.º 7
0
    def poll_task_jobs(self, suite, itasks, poll_succ=True, msg=None):
        """Poll jobs of specified tasks.

        Any job that is or was submitted or running can be polled, except for
        retrying tasks - which would poll (correctly) as failed. And don't poll
        succeeded tasks by default.

        This method uses _poll_task_jobs_callback() and
        _manip_task_jobs_callback() as help/callback methods.

        _poll_task_job_callback() executes one specific job.
        """
        to_poll_tasks = []
        pollable_statuses = set([
            TASK_STATUS_SUBMITTED, TASK_STATUS_RUNNING, TASK_STATUS_FAILED])
        if poll_succ:
            pollable_statuses.add(TASK_STATUS_SUCCEEDED)
        for itask in itasks:
            if itask.state.status in pollable_statuses:
                to_poll_tasks.append(itask)
            else:
                LOG.debug("skipping %s: not pollable, "
                          "or skipping 'succeeded' tasks" % itask.identity)
        if to_poll_tasks:
            if msg is not None:
                LOG.info(msg)
            self._run_job_cmd(
                self.JOBS_POLL, suite, to_poll_tasks,
                self._poll_task_jobs_callback)
Exemplo n.º 8
0
    def _process_message_started(self, itask, event_time):
        """Helper for process_message, handle a started message."""
        if itask.job_vacated:
            itask.job_vacated = False
            LOG.warning("Vacated job restarted", itask=itask)
        self.pflag = True
        itask.state.reset_state(TASK_STATUS_RUNNING)
        itask.set_summary_time('started', event_time)
        self.suite_db_mgr.put_update_task_jobs(itask, {
            "time_run": itask.summary['started_time_string']})
        if itask.summary['execution_time_limit']:
            execution_timeout = itask.summary['execution_time_limit']
        else:
            execution_timeout = self._get_events_conf(
                itask, 'execution timeout')
        try:
            itask.timeout_timers[TASK_STATUS_RUNNING] = (
                itask.summary['started_time'] + float(execution_timeout))
        except (TypeError, ValueError):
            itask.timeout_timers[TASK_STATUS_RUNNING] = None

        # submission was successful so reset submission try number
        if TASK_STATUS_SUBMIT_RETRYING in itask.try_timers:
            itask.try_timers[TASK_STATUS_SUBMIT_RETRYING].num = 0
        self.setup_event_handlers(itask, 'started', 'job started')
        self.set_poll_time(itask)
Exemplo n.º 9
0
 def _set_state(self, status):
     """Set, log and record task status (normal change, not forced - don't
     update task_events table)."""
     if self.status == self.hold_swap:
         self.hold_swap = None
     if status == self.status and self.hold_swap is None:
         return
     o_status, o_hold_swap = self.status, self.hold_swap
     if status == TASK_STATUS_HELD:
         self.hold_swap = self.status
     elif status in TASK_STATUSES_ACTIVE:
         if self.status == TASK_STATUS_HELD:
             self.hold_swap = TASK_STATUS_HELD
     elif (self.hold_swap == TASK_STATUS_HELD
           and status not in TASK_STATUSES_FINAL):
         self.hold_swap = status
         status = TASK_STATUS_HELD
     elif self.hold_swap:
         self.hold_swap = None
     self.status = status
     self.time_updated = get_current_time_string()
     flags.iflag = True
     # Log
     message = str(o_status)
     if o_hold_swap:
         message += " (%s)" % o_hold_swap
     message += " => %s" % self.status
     if self.hold_swap:
         message += " (%s)" % self.hold_swap
     LOG.debug(message, itask=self.identity)
Exemplo n.º 10
0
    def poll_task_jobs(self, suite, itasks, poll_succ=True, msg=None):
        """Poll jobs of specified tasks.

        Any job that is or was submitted or running can be polled, except for
        retrying tasks - which would poll (correctly) as failed. And don't poll
        succeeded tasks by default.

        This method uses _poll_task_jobs_callback() and
        _manip_task_jobs_callback() as help/callback methods.

        _poll_task_job_callback() executes one specific job.
        """
        poll_me = []
        pollable = [
            TASK_STATUS_SUBMITTED, TASK_STATUS_RUNNING, TASK_STATUS_FAILED
        ]
        for itask in itasks:
            if itask.state.status in pollable or (
                    itask.state.status == TASK_STATUS_SUCCEEDED and poll_succ):
                poll_me.append(itask)
            else:
                LOG.debug("skipping %s: not pollable, "
                          "or skipping 'succeeded' tasks" % itask.identity)
        if poll_me:
            if msg is not None:
                LOG.info(msg)
            self._run_job_cmd(self.JOBS_POLL, suite, poll_me,
                              self._poll_task_jobs_callback)
Exemplo n.º 11
0
 def _process_message_failed(self, itask, event_time, message):
     """Helper for process_message, handle a failed message."""
     if event_time is None:
         event_time = get_current_time_string()
     itask.set_summary_time('finished', event_time)
     self.suite_db_mgr.put_update_task_jobs(itask, {
         "run_status": 1,
         "time_run_exit": event_time,
     })
     if (TASK_STATUS_RETRYING not in itask.try_timers
             or itask.try_timers[TASK_STATUS_RETRYING].next() is None):
         # No retry lined up: definitive failure.
         self.pflag = True
         if itask.state.reset_state(TASK_STATUS_FAILED):
             self.setup_event_handlers(itask, "failed", message)
         LOG.critical("job(%02d) %s" % (itask.submit_num, "failed"),
                      itask=itask)
     else:
         # There is a retry lined up
         delay_msg = "retrying in %s" % (
             itask.try_timers[TASK_STATUS_RETRYING].delay_timeout_as_str())
         msg = "failed, %s" % (delay_msg)
         LOG.info("job(%02d) %s" % (itask.submit_num, msg), itask=itask)
         itask.summary['latest_message'] = msg
         if itask.state.reset_state(TASK_STATUS_RETRYING):
             self.setup_event_handlers(
                 itask, "retry", "%s, %s" % (self.JOB_FAILED, delay_msg))
     self._reset_job_timers(itask)
Exemplo n.º 12
0
 def _report_connection_if_denied(self):
     """Log an (un?)successful connection attempt."""
     prog_name, user, host, uuid = _get_client_info()[1:]
     connection_denied = self._get_client_connection_denied()
     if connection_denied:
         LOG.warning(self.__class__.LOG_CONNECT_DENIED_TMPL % (
             user, host, prog_name, uuid))
Exemplo n.º 13
0
    def _execute_stmt(self, stmt, stmt_args_list):
        """Helper for "self.execute_queued_items".

        Execute a statement. If this is the public database, return True on
        success and False on failure. If this is the private database, return
        True on success, and raise on failure.
        """
        try:
            self.connect()
            self.conn.executemany(stmt, stmt_args_list)
        except sqlite3.Error:
            if not self.is_public:
                raise
            if cylc.flags.debug:
                traceback.print_exc()
            err_log = ("cannot execute database statement:\n"
                       "file=%(file)s:\nstmt=%(stmt)s") % {
                           "file": self.db_file_name,
                           "stmt": stmt
                       }
            for i, stmt_args in enumerate(stmt_args_list):
                err_log += ("\nstmt_args[%(i)d]=%(stmt_args)s" % {
                    "i": i,
                    "stmt_args": stmt_args
                })
            LOG.warning(err_log)
            raise
Exemplo n.º 14
0
    def _process_message_started(self, itask, event_time):
        """Helper for process_message, handle a started message."""
        if itask.job_vacated:
            itask.job_vacated = False
            LOG.warning("Vacated job restarted", itask=itask)
        self.pflag = True
        itask.state.reset_state(TASK_STATUS_RUNNING)
        itask.set_event_time('started', event_time)
        self.suite_db_mgr.put_update_task_jobs(itask, {
            "time_run": itask.summary['started_time_string']})
        if itask.summary['execution_time_limit']:
            execution_timeout = itask.summary['execution_time_limit']
        else:
            execution_timeout = self._get_events_conf(
                itask, 'execution timeout')
        try:
            itask.timeout_timers[TASK_STATUS_RUNNING] = (
                itask.summary['started_time'] + float(execution_timeout))
        except (TypeError, ValueError):
            itask.timeout_timers[TASK_STATUS_RUNNING] = None

        # submission was successful so reset submission try number
        if TASK_STATUS_SUBMIT_RETRYING in itask.try_timers:
            itask.try_timers[TASK_STATUS_SUBMIT_RETRYING].num = 0
        self.setup_event_handlers(itask, 'started', 'job started')
        self.set_poll_time(itask)
Exemplo n.º 15
0
 def _process_message_submit_failed(self, itask, event_time):
     """Helper for process_message, handle a submit-failed message."""
     LOG.error(self.EVENT_SUBMIT_FAILED, itask=itask)
     if event_time is None:
         event_time = get_current_time_string()
     self.suite_db_mgr.put_update_task_jobs(itask, {
         "time_submit_exit": get_current_time_string(),
         "submit_status": 1,
     })
     itask.summary['submit_method_id'] = None
     if (TASK_STATUS_SUBMIT_RETRYING not in itask.try_timers or
             itask.try_timers[TASK_STATUS_SUBMIT_RETRYING].next() is None):
         # No submission retry lined up: definitive failure.
         self.pflag = True
         # See github #476.
         if itask.state.reset_state(TASK_STATUS_SUBMIT_FAILED):
             self.setup_event_handlers(
                 itask, self.EVENT_SUBMIT_FAILED,
                 'job %s' % self.EVENT_SUBMIT_FAILED)
     else:
         # There is a submission retry lined up.
         timer = itask.try_timers[TASK_STATUS_SUBMIT_RETRYING]
         delay_msg = "submit-retrying in %s" % timer.delay_timeout_as_str()
         msg = "%s, %s" % (self.EVENT_SUBMIT_FAILED, delay_msg)
         LOG.info("job(%02d) %s" % (itask.submit_num, msg), itask=itask)
         itask.summary['latest_message'] = msg
         if itask.state.reset_state(TASK_STATUS_SUBMIT_RETRYING):
             self.setup_event_handlers(
                 itask, self.EVENT_SUBMIT_RETRY,
                 "job %s, %s" % (self.EVENT_SUBMIT_FAILED, delay_msg))
     self._reset_job_timers(itask)
Exemplo n.º 16
0
    def report(self, request, server_obj):
        """Log client requests with identifying information.

        In debug mode log all requests including task messages. Otherwise log
        all user commands, and just the first info request from each client.

        """
        if threading.current_thread().__class__.__name__ == '_MainThread':
            # Server methods may be called internally as well as by clients.
            return
        auth_user, prog_name, user, host, uuid, priv_level = get_client_info()
        name = server_obj.__class__.__name__
        log_me = (
            cylc.flags.debug or
            name in ["SuiteCommandServer",
                     "ExtTriggerServer",
                     "BroadcastServer"] or
            (name not in ["SuiteIdServer", "TaskMessageServer"] and
             uuid not in self.clients))
        if log_me:
            LOG.debug(
                self.__class__.LOG_CONNECT_ALLOWED_TMPL % (
                    user, host, prog_name, priv_level, uuid)
            )
            LOG.info(
                self.__class__.LOG_COMMAND_TMPL % (
                    request, user, host, prog_name, uuid))
        if name == "SuiteIdServer":
            self._num_id_requests += 1
            self.report_id_requests()
        self.clients[uuid] = datetime.datetime.utcnow()
        self._housekeep()
Exemplo n.º 17
0
    def _check_access_priv_and_report(self,
                                      required_privilege_level,
                                      log_info=True):
        """Check access privilege and log requests with identifying info.

        In debug mode log all requests including task messages. Otherwise log
        all user commands, and just the first info command from each client.

        Return:
            dict: containing the client session

        """
        self._check_access_priv(required_privilege_level)
        command = inspect.currentframe().f_back.f_code.co_name
        auth_user, prog_name, user, host, uuid = _get_client_info()
        priv_level = self._get_priv_level(auth_user)
        LOG.debug(self.__class__.LOG_CONNECT_ALLOWED_TMPL %
                  (user, host, prog_name, priv_level, uuid))
        if cylc.flags.debug or uuid not in self.clients and log_info:
            LOG.info(self.__class__.LOG_COMMAND_TMPL %
                     (command, user, host, prog_name, uuid))
        self.clients.setdefault(uuid, {})
        self.clients[uuid]['time'] = time()
        self._housekeep()
        return self.clients[uuid]
Exemplo n.º 18
0
 def _process_message_succeeded(self, itask, event_time):
     """Helper for process_message, handle a succeeded message."""
     self.pflag = True
     itask.set_summary_time('finished', event_time)
     self.suite_db_mgr.put_update_task_jobs(itask, {
         "run_status": 0,
         "time_run_exit": event_time,
     })
     # Update mean elapsed time only on task succeeded.
     if itask.summary['started_time'] is not None:
         itask.tdef.elapsed_times.append(
             itask.summary['finished_time'] -
             itask.summary['started_time'])
     if not itask.state.outputs.all_completed():
         msg = ""
         for output in itask.state.outputs.get_not_completed():
             if output not in [TASK_OUTPUT_EXPIRED,
                               TASK_OUTPUT_SUBMIT_FAILED,
                               TASK_OUTPUT_FAILED]:
                 msg += "\n  " + output
         if msg:
             LOG.info("Succeeded with outputs not completed: %s" % msg,
                      itask=itask)
     if itask.state.reset_state(TASK_STATUS_SUCCEEDED):
         self.setup_event_handlers(itask, "succeeded", "job succeeded")
     self._reset_job_timers(itask)
Exemplo n.º 19
0
 def check_job_time(self, itask, now):
     """Check/handle job timeout and poll timer"""
     can_poll = self.check_poll_time(itask, now)
     if itask.timeout is None or now <= itask.timeout:
         return can_poll
     # Timeout reached for task, emit event and reset itask.timeout
     if itask.state.status == TASK_STATUS_RUNNING:
         time_ref = itask.summary['started_time']
         event = 'execution timeout'
     elif itask.state.status == TASK_STATUS_SUBMITTED:
         time_ref = itask.summary['submitted_time']
         event = 'submission timeout'
     msg = event
     try:
         msg += ' after %s' % intvl_as_str(itask.timeout - time_ref)
     except (TypeError, ValueError):
         # Badness in time_ref?
         pass
     itask.timeout = None  # emit event only once
     if msg and event:
         LOG.warning(msg, itask=itask)
         self.setup_event_handlers(itask, event, msg)
         return True
     else:
         return can_poll
Exemplo n.º 20
0
 def _process_message_failed(self, itask, event_time, message):
     """Helper for process_message, handle a failed message."""
     if event_time is None:
         event_time = get_current_time_string()
     itask.set_summary_time('finished', event_time)
     self.suite_db_mgr.put_update_task_jobs(itask, {
         "run_status": 1,
         "time_run_exit": event_time,
     })
     if (TASK_STATUS_RETRYING not in itask.try_timers or
             itask.try_timers[TASK_STATUS_RETRYING].next() is None):
         # No retry lined up: definitive failure.
         self.pflag = True
         if itask.state.reset_state(TASK_STATUS_FAILED):
             self.setup_event_handlers(itask, "failed", message)
         LOG.critical("job(%02d) %s" % (
             itask.submit_num, "failed"), itask=itask)
     else:
         # There is a retry lined up
         delay_msg = "retrying in %s" % (
             itask.try_timers[TASK_STATUS_RETRYING].delay_timeout_as_str())
         msg = "failed, %s" % (delay_msg)
         LOG.info("job(%02d) %s" % (itask.submit_num, msg), itask=itask)
         itask.summary['latest_message'] = msg
         if itask.state.reset_state(TASK_STATUS_RETRYING):
             self.setup_event_handlers(
                 itask, "retry", "%s, %s" % (self.JOB_FAILED, delay_msg))
     self._reset_job_timers(itask)
Exemplo n.º 21
0
    def _prep_submit_task_job(self, suite, itask, dry_run):
        """Prepare a task job submission.

        Return itask on a good preparation.

        """
        if itask.local_job_file_path and not dry_run:
            return itask

        try:
            job_conf = self._prep_submit_task_job_impl(suite, itask)
            local_job_file_path = self.task_events_mgr.get_task_job_log(
                suite, itask.point, itask.tdef.name, itask.submit_num,
                self.JOB_FILE_BASE)
            self.job_file_writer.write(local_job_file_path, job_conf)
        except Exception, exc:
            # Could be a bad command template.
            ERR.error(traceback.format_exc())
            LOG.error(traceback.format_exc())
            self.task_events_mgr.log_task_job_activity(
                SuiteProcContext(
                    self.JOBS_SUBMIT,
                    '(prepare job file)', err=exc, ret_code=1),
                suite, itask.point, itask.tdef.name)
            if not dry_run:
                self.task_events_mgr.process_message(
                    itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED)
            return
Exemplo n.º 22
0
 def _job_cmd_out_callback(self, suite, itask, cmd_ctx, line):
     """Callback on job command STDOUT/STDERR."""
     if cmd_ctx.cmd_kwargs.get("host") and cmd_ctx.cmd_kwargs.get("user"):
         owner_at_host = "(%(user)s@%(host)s) " % cmd_ctx.cmd_kwargs
     elif cmd_ctx.cmd_kwargs.get("host"):
         owner_at_host = "(%(host)s) " % cmd_ctx.cmd_kwargs
     elif cmd_ctx.cmd_kwargs.get("user"):
         owner_at_host = "(%(user)s@localhost) " % cmd_ctx.cmd_kwargs
     else:
         owner_at_host = ""
     try:
         timestamp, _, content = line.split("|")
     except ValueError:
         pass
     else:
         line = "%s %s" % (timestamp, content)
     job_activity_log = self.task_events_mgr.get_task_job_activity_log(
         suite, itask.point, itask.tdef.name)
     try:
         with open(job_activity_log, "ab") as handle:
             if not line.endswith("\n"):
                 line += "\n"
             handle.write(owner_at_host + line)
     except IOError as exc:
         LOG.warning("%s: write failed\n%s" % (job_activity_log, exc))
         LOG.warning(owner_at_host + line, itask=itask)
Exemplo n.º 23
0
 def load_db_broadcast_states(self, row_idx, row):
     """Load broadcast variables from runtime DB broadcast states row."""
     if row_idx == 0:
         LOG.info("LOADING broadcast states")
     point, namespace, key, value = row
     sections = []
     cur_key = key
     if "]" in cur_key:
         sections = self.REC_SECTION.findall(cur_key)
         cur_key = cur_key.rsplit(r"]", 1)[-1]
     with self.lock:
         self.broadcasts.setdefault(point, {})
         self.broadcasts[point].setdefault(namespace, {})
         dict_ = self.broadcasts[point][namespace]
         for section in sections:
             dict_.setdefault(section, {})
             dict_ = dict_[section]
         dict_[cur_key] = value
     LOG.info(
         CHANGE_FMT.strip() % {
             "change": CHANGE_PREFIX_SET,
             "point": point,
             "namespace": namespace,
             "key": key,
             "value": value
         })
Exemplo n.º 24
0
    def _prep_submit_task_job(self, suite, itask, dry_run):
        """Prepare a task job submission.

        Return itask on a good preparation.

        """
        if itask.local_job_file_path and not dry_run:
            return itask

        try:
            job_conf = self._prep_submit_task_job_impl(suite, itask)
            local_job_file_path = self.task_events_mgr.get_task_job_log(
                suite, itask.point, itask.tdef.name, itask.submit_num,
                self.JOB_FILE_BASE)
            self.job_file_writer.write(local_job_file_path, job_conf)
        except Exception, exc:
            # Could be a bad command template.
            LOG.error(traceback.format_exc())
            self.task_events_mgr.log_task_job_activity(
                SuiteProcContext(
                    self.JOBS_SUBMIT,
                    '(prepare job file)', err=exc, ret_code=1),
                suite, itask.point, itask.tdef.name)
            if not dry_run:
                self.task_events_mgr.process_message(
                    itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED,
                    self.poll_task_jobs)
            return
Exemplo n.º 25
0
    def _remote_init_items(self, host, owner):
        """Return list of items that should be installed on task remote.

        Each item is (path, name),
        where name is relative path under suite run directory.
        """
        items = []
        comm_meth = glbl_cfg().get_host_item('task communication method', host,
                                             owner)
        LOG.debug('comm_meth=%s' % comm_meth)
        if comm_meth in ['ssh', 'http', 'https']:
            # Contact file
            items.append(
                (self.suite_srv_files_mgr.get_contact_file(self.suite),
                 os.path.join(self.suite_srv_files_mgr.DIR_BASE_SRV,
                              self.suite_srv_files_mgr.FILE_BASE_CONTACT)))
            if comm_meth in ['http', 'https']:
                # Passphrase file
                items.append(
                    (self.suite_srv_files_mgr.get_auth_item(
                        self.suite_srv_files_mgr.FILE_BASE_PASSPHRASE,
                        self.suite),
                     os.path.join(
                         self.suite_srv_files_mgr.DIR_BASE_SRV,
                         self.suite_srv_files_mgr.FILE_BASE_PASSPHRASE)))
            if comm_meth in ['https']:
                # SSL cert file
                items.append(
                    (self.suite_srv_files_mgr.get_auth_item(
                        self.suite_srv_files_mgr.FILE_BASE_SSL_CERT,
                        self.suite),
                     os.path.join(
                         self.suite_srv_files_mgr.DIR_BASE_SRV,
                         self.suite_srv_files_mgr.FILE_BASE_SSL_CERT)))
        return items
Exemplo n.º 26
0
    def report(self, request, server_obj):
        """Log client requests with identifying information.

        In debug mode log all requests including task messages. Otherwise log
        all user commands, and just the first info request from each client.

        """
        if threading.current_thread().__class__.__name__ == '_MainThread':
            # Server methods may be called internally as well as by clients.
            return
        auth_user, prog_name, user, host, uuid, priv_level = get_client_info()
        name = server_obj.__class__.__name__
        log_me = (
            cylc.flags.debug or
            name in ["SuiteCommandServer",
                     "ExtTriggerServer",
                     "BroadcastServer"] or
            (name not in ["SuiteIdServer", "TaskMessageServer"] and
             uuid not in self.clients))
        if log_me:
            LOG.debug(
                self.__class__.LOG_CONNECT_ALLOWED_TMPL % (
                    user, host, prog_name, priv_level, uuid)
            )
            LOG.info(
                self.__class__.LOG_COMMAND_TMPL % (
                    request, user, host, prog_name, uuid))
        if name == "SuiteIdServer":
            self._num_id_requests += 1
            self.report_id_requests()
        self.clients[uuid] = datetime.datetime.utcnow()
        self._housekeep()
Exemplo n.º 27
0
    def remote_tidy(self):
        """Remove suite contact files from initialised remotes.

        Call "cylc remote-tidy".
        This method is called on suite shutdown, so we want nothing to hang.
        Timeout any incomplete commands after 10 seconds.

        Also remove UUID file on suite host ".service/uuid".
        """
        # Remove UUID file
        uuid_fname = os.path.join(
            self.suite_srv_files_mgr.get_suite_srv_dir(self.suite),
            FILE_BASE_UUID)
        try:
            os.unlink(uuid_fname)
        except OSError:
            pass
        # Issue all SSH commands in parallel
        procs = {}
        for (host, owner), init_with_contact in self.remote_init_map.items():
            if init_with_contact != REMOTE_INIT_DONE:
                continue
            cmd = ['timeout', '10', 'cylc', 'remote-tidy']
            if is_remote_host(host):
                cmd.append('--host=%s' % host)
            if is_remote_user(owner):
                cmd.append('--user=%s' % owner)
            if cylc.flags.debug:
                cmd.append('--debug')
            cmd.append(os.path.join(glbl_cfg().get_derived_host_item(
                self.suite, 'suite run directory', host, owner)))
            procs[(host, owner)] = (
                cmd,
                Popen(cmd, stdout=PIPE, stderr=PIPE, stdin=open(os.devnull)))
        # Wait for commands to complete for a max of 10 seconds
        timeout = time() + 10.0
        while procs and time() < timeout:
            for (host, owner), (cmd, proc) in procs.copy().items():
                if proc.poll() is None:
                    continue
                del procs[(host, owner)]
                out, err = proc.communicate()
                if proc.wait():
                    LOG.warning(TaskRemoteMgmtError(
                        TaskRemoteMgmtError.MSG_TIDY,
                        (host, owner), ' '.join(quote(item) for item in cmd),
                        proc.ret_code, out, err))
        # Terminate any remaining commands
        for (host, owner), (cmd, proc) in procs.items():
            try:
                proc.terminate()
            except OSError:
                pass
            out, err = proc.communicate()
            if proc.wait():
                LOG.warning(TaskRemoteMgmtError(
                    TaskRemoteMgmtError.MSG_TIDY,
                    (host, owner), ' '.join(quote(item) for item in cmd),
                    proc.ret_code, out, err))
Exemplo n.º 28
0
 def __init__(self, pool_size=None):
     self.pool_size = (pool_size or GLOBAL_CFG.get(["process pool size"])
                       or multiprocessing.cpu_count())
     # (The Pool class defaults to cpu_count anyway, but does not
     # expose the result via its public interface).
     LOG.debug("Initializing process pool, size %d" % self.pool_size)
     self.pool = multiprocessing.Pool(processes=self.pool_size)
     self.results = {}
Exemplo n.º 29
0
    def clear_broadcast(self,
                        point_strings=None,
                        namespaces=None,
                        cancel_settings=None):
        """Clear broadcasts globally, or for listed namespaces and/or points.

        Return a tuple (modified_settings, bad_options), where:
        * modified_settings is similar to the return value of the "put" method,
          but for removed broadcasts.
        * bad_options is a dict in the form:
              {"point_strings": ["20020202", ..."], ...}
          The dict is only populated if there are options not associated with
          previous broadcasts. The keys can be:
          * point_strings: a list of bad point strings.
          * namespaces: a list of bad namespaces.
          * cancel: a list of tuples. Each tuple contains the keys of a bad
            setting.
        """
        # If cancel_settings defined, only clear specific broadcasts
        cancel_keys_list = self._settings_to_keys_list(cancel_settings)

        # Clear broadcasts
        modified_settings = []
        with self.lock:
            for point_string, point_string_settings in self.broadcasts.items():
                if point_strings and point_string not in point_strings:
                    continue
                for namespace, namespace_settings in (
                        point_string_settings.items()):
                    if namespaces and namespace not in namespaces:
                        continue
                    stuff_stack = [([], namespace_settings)]
                    while stuff_stack:
                        keys, stuff = stuff_stack.pop()
                        for key, value in stuff.items():
                            if isinstance(value, dict):
                                stuff_stack.append((keys + [key], value))
                            elif (not cancel_keys_list
                                  or keys + [key] in cancel_keys_list):
                                stuff[key] = None
                                setting = {key: value}
                                for rkey in reversed(keys):
                                    setting = {rkey: setting}
                                modified_settings.append(
                                    (point_string, namespace, setting))

        # Prune any empty branches
        bad_options = self._get_bad_options(self._prune(), point_strings,
                                            namespaces, cancel_keys_list)

        # Log the broadcast
        self.suite_db_mgr.put_broadcast(modified_settings, is_cancel=True)
        LOG.info(get_broadcast_change_report(modified_settings,
                                             is_cancel=True))
        if bad_options:
            LOG.error(get_broadcast_bad_options_report(bad_options))

        return (modified_settings, bad_options)
Exemplo n.º 30
0
 def recover_pub_from_pri(self):
     """Recover public database from private database."""
     if self.pub_dao.n_tries >= self.pub_dao.MAX_TRIES:
         self.copy_pri_to_pub()
         LOG.warning(
             "%(pub_db_name)s: recovered from %(pri_db_name)s" % {
                 "pub_db_name": self.pub_dao.db_file_name,
                 "pri_db_name": self.pri_dao.db_file_name})
         self.pub_dao.n_tries = 0
Exemplo n.º 31
0
 def recover_pub_from_pri(self):
     """Recover public database from private database."""
     if self.pub_dao.n_tries >= self.pub_dao.MAX_TRIES:
         self.copy_pri_to_pub()
         LOG.warning(
             "%(pub_db_name)s: recovered from %(pri_db_name)s" % {
                 "pub_db_name": self.pub_dao.db_file_name,
                 "pri_db_name": self.pri_dao.db_file_name})
         self.pub_dao.n_tries = 0
Exemplo n.º 32
0
 def _housekeep(self):
     """Forget inactive clients."""
     for uuid, client_info in self.clients.copy().items():
         if time() - client_info['time'] > self.CLIENT_FORGET_SEC:
             try:
                 del self.clients[uuid]
             except KeyError:
                 pass
             LOG.debug(self.LOG_FORGET_TMPL % uuid)
Exemplo n.º 33
0
 def join(self):
     """Join after workers have exited. Close or terminate first."""
     LOG.debug("Joining process pool")
     try:
         self.pool.join()
     except AssertionError:
         # multiprocessing.Pool.join may raise this error. We want to ignore
         # this so suite shutdown can continue.
         pass
Exemplo n.º 34
0
 def put_command(self, ctx, callback, callback_args=None):
     """Queue a new shell command to execute."""
     try:
         result = self.pool.apply_async(_run_command, [ctx])
     except AssertionError as exc:
         LOG.warning("%s\n  %s\n %s" %
                     (str(exc), "Rejecting command (pool closed)", ctx.cmd))
     else:
         self.results[id(result)] = (result, callback, callback_args)
Exemplo n.º 35
0
 def _housekeep(self):
     """Forget inactive clients."""
     for uuid, client_info in self.clients.copy().items():
         if time() - client_info['time'] > self.CLIENT_FORGET_SEC:
             try:
                 del self.clients[uuid]
             except KeyError:
                 pass
             LOG.debug(self.LOG_FORGET_TMPL % uuid)
Exemplo n.º 36
0
    def _prep_submit_task_job(self, suite, itask, dry_run):
        """Prepare a task job submission.

        Return itask on a good preparation.

        """
        if itask.local_job_file_path and not dry_run:
            return itask

        # Handle broadcasts
        overrides = self.task_events_mgr.broadcast_mgr.get_broadcast(
            itask.identity)
        if overrides:
            rtconfig = pdeepcopy(itask.tdef.rtconfig)
            poverride(rtconfig, overrides, prepend=True)
        else:
            rtconfig = itask.tdef.rtconfig

        # Determine task host settings now, just before job submission,
        # because dynamic host selection may be used.
        try:
            task_host = self.task_remote_mgr.remote_host_select(
                rtconfig['remote']['host'])
        except TaskRemoteMgmtError as exc:
            # Submit number not yet incremented
            itask.submit_num += 1
            itask.summary['submit_num'] = itask.submit_num
            self._prep_submit_task_job_error(suite, itask, dry_run,
                                             '(remote host select)', exc)
            return False
        else:
            if task_host is None:  # host select not ready
                itask.summary['latest_message'] = self.REMOTE_SELECT_MSG
                return
            itask.task_host = task_host

        try:
            job_conf = self._prep_submit_task_job_impl(suite, itask, rtconfig)
            local_job_file_path = self.task_events_mgr.get_task_job_log(
                suite, itask.point, itask.tdef.name, itask.submit_num,
                self.JOB_FILE_BASE)
            self.job_file_writer.write(local_job_file_path, job_conf)
        except StandardError as exc:
            # Could be a bad command template, IOError, etc
            self._prep_submit_task_job_error(suite, itask, dry_run,
                                             '(prepare job file)', exc)
            return False
        itask.local_job_file_path = local_job_file_path

        if dry_run:
            # This will be shown next to submit num in gcylc:
            itask.summary['latest_message'] = 'job file written (edit/dry-run)'
            LOG.debug(itask.summary['latest_message'], itask=itask)

        # Return value used by "cylc submit" and "cylc jobscript":
        return itask
Exemplo n.º 37
0
 def execute_queued_items(self):
     """Execute queued items for each table."""
     try:
         for table in self.tables.values():
             # DELETE statements may have varying number of WHERE args so we
             # can only executemany for each identical template statement.
             for stmt, stmt_args_list in table.delete_queues.items():
                 self._execute_stmt(stmt, stmt_args_list)
             # INSERT statements are uniform for each table, so all INSERT
             # statements can be executed using a single "executemany" call.
             if table.insert_queue:
                 self._execute_stmt(table.get_insert_stmt(),
                                    table.insert_queue)
             # UPDATE statements can have varying number of SET and WHERE
             # args so we can only executemany for each identical template
             # statement.
             for stmt, stmt_args_list in table.update_queues.items():
                 self._execute_stmt(stmt, stmt_args_list)
         # Connection should only be opened if we have executed something.
         if self.conn is None:
             return
         self.conn.commit()
     except sqlite3.Error:
         if not self.is_public:
             raise
         self.n_tries += 1
         LOG.warning(
             "%(file)s: write attempt (%(attempt)d) did not complete\n" % {
                 "file": self.db_file_name,
                 "attempt": self.n_tries
             })
         if self.conn is not None:
             try:
                 self.conn.rollback()
             except sqlite3.Error:
                 pass
         return
     else:
         # Clear the queues
         for table in self.tables.values():
             table.delete_queues.clear()
             del table.insert_queue[:]  # list.clear avail from Python 3.3
             table.update_queues.clear()
         # Report public database retry recovery if necessary
         if self.n_tries:
             LOG.warning(
                 "%(file)s: recovered after (%(attempt)d) attempt(s)\n" % {
                     "file": self.db_file_name,
                     "attempt": self.n_tries
                 })
         self.n_tries = 0
     finally:
         # Note: This is not strictly necessary. However, if the suite run
         # directory is removed, a forced reconnection to the private
         # database will ensure that the suite dies.
         self.close()
Exemplo n.º 38
0
    def _housekeep(self):
        """Forget inactive clients."""

        for uuid in self.clients.keys():
            dtime = self.clients[uuid]
            if (self._total_seconds(datetime.datetime.utcnow() - dtime) >
                    self.__class__.CLIENT_FORGET_SEC):
                del self.clients[uuid]
                LOG.debug(
                    self.__class__.LOG_FORGET_TMPL % uuid)
Exemplo n.º 39
0
 def _run_event_handlers_callback(self, proc_ctx, abort_on_error=False):
     """Callback on completion of a suite event handler."""
     if proc_ctx.ret_code:
         msg = '%s EVENT HANDLER FAILED' % proc_ctx.cmd_key[1]
         LOG.error(str(proc_ctx))
         ERR.error(msg)
         if abort_on_error:
             raise SuiteEventError(msg)
     else:
         LOG.info(str(proc_ctx))
Exemplo n.º 40
0
    def _housekeep(self):
        """Forget inactive clients."""

        for uuid in self.clients.keys():
            dtime = self.clients[uuid]
            if (self._total_seconds(datetime.datetime.utcnow() - dtime) >
                    self.__class__.CLIENT_FORGET_SEC):
                del self.clients[uuid]
                LOG.debug(
                    self.__class__.LOG_FORGET_TMPL % uuid)
Exemplo n.º 41
0
 def signout(self):
     """Forget client, where possible."""
     uuid = _get_client_info()[4]
     try:
         del self.clients[uuid]
     except KeyError:
         return False
     else:
         LOG.debug(self.LOG_FORGET_TMPL % uuid)
         return True
Exemplo n.º 42
0
 def signout(self):
     """Forget client, where possible."""
     uuid = _get_client_info()[4]
     try:
         del self.clients[uuid]
     except KeyError:
         return False
     else:
         LOG.debug(self.LOG_FORGET_TMPL % uuid)
         return True
Exemplo n.º 43
0
 def satisfy_xclock(self, itask):
     """Attempt to satisfy itask's clock trigger, if it has one."""
     label, sig, ctx, satisfied = self._get_xclock(itask)
     if satisfied:
         return
     if wall_clock(*ctx.func_args, **ctx.func_kwargs):
         satisfied = True
         itask.state.xclock = (label, True)
         self.sat_xclock.append(sig)
         LOG.info('clock xtrigger satisfied: %s = %s' % (label, str(ctx)))
Exemplo n.º 44
0
 def __init__(self, pool_size=None):
     self.pool_size = (
         pool_size or
         GLOBAL_CFG.get(["process pool size"]) or
         multiprocessing.cpu_count())
     # (The Pool class defaults to cpu_count anyway, but does not
     # expose the result via its public interface).
     LOG.debug(
         "Initializing process pool, size %d" % self.pool_size)
     self.pool = multiprocessing.Pool(processes=self.pool_size)
     self.results = {}
Exemplo n.º 45
0
 def put_command(self, ctx, callback, callback_args=None):
     """Queue a new shell command to execute."""
     try:
         result = self.pool.apply_async(_run_command, [ctx])
     except AssertionError as exc:
         LOG.warning("%s\n  %s\n %s" % (
             str(exc),
             "Rejecting command (pool closed)",
             ctx.cmd))
     else:
         self.results[id(result)] = (result, callback, callback_args)
Exemplo n.º 46
0
 def execute_queued_items(self):
     """Execute queued items for each table."""
     try:
         for table in self.tables.values():
             # DELETE statements may have varying number of WHERE args so we
             # can only executemany for each identical template statement.
             for stmt, stmt_args_list in table.delete_queues.items():
                 self._execute_stmt(stmt, stmt_args_list)
             # INSERT statements are uniform for each table, so all INSERT
             # statements can be executed using a single "executemany" call.
             if table.insert_queue:
                 self._execute_stmt(table.get_insert_stmt(), table.insert_queue)
             # UPDATE statements can have varying number of SET and WHERE
             # args so we can only executemany for each identical template
             # statement.
             for stmt, stmt_args_list in table.update_queues.items():
                 self._execute_stmt(stmt, stmt_args_list)
         # Connection should only be opened if we have executed something.
         if self.conn is None:
             return
         self.conn.commit()
     except sqlite3.Error:
         if not self.is_public:
             raise
         self.n_tries += 1
         LOG.warning(
             "%(file)s: write attempt (%(attempt)d) did not complete\n"
             % {"file": self.db_file_name, "attempt": self.n_tries}
         )
         if self.conn is not None:
             try:
                 self.conn.rollback()
             except sqlite3.Error:
                 pass
         return
     else:
         # Clear the queues
         for table in self.tables.values():
             table.delete_queues.clear()
             del table.insert_queue[:]  # list.clear avail from Python 3.3
             table.update_queues.clear()
         # Report public database retry recovery if necessary
         if self.n_tries:
             LOG.warning(
                 "%(file)s: recovered after (%(attempt)d) attempt(s)\n"
                 % {"file": self.db_file_name, "attempt": self.n_tries}
             )
         self.n_tries = 0
     finally:
         # Note: This is not strictly necessary. However, if the suite run
         # directory is removed, a forced reconnection to the private
         # database will ensure that the suite dies.
         self.close()
Exemplo n.º 47
0
 def poll_task_jobs(self, suite, itasks, warn_skips=False):
     """Poll jobs of specified tasks."""
     active_itasks = []
     for itask in itasks:
         if itask.state.status in TASK_STATUSES_ACTIVE:
             active_itasks.append(itask)
         elif warn_skips:  # and not active
             LOG.warning(
                 '%s: skip poll, task not pollable' % itask.identity)
     self._run_job_cmd(
         self.JOBS_POLL, suite, active_itasks,
         self._poll_task_jobs_callback)
Exemplo n.º 48
0
    def _housekeep(self):
        """Forget inactive clients."""

        for uuid, dtime in self.clients.copy().items():
            if (self._total_seconds(datetime.datetime.utcnow() - dtime) >
                    self.__class__.CLIENT_FORGET_SEC):
                try:
                    del self.clients[uuid]
                except KeyError:
                    pass
                LOG.debug(
                    self.__class__.LOG_FORGET_TMPL % uuid)
Exemplo n.º 49
0
 def _run_event_custom_handlers(self, config, ctx):
     """Helper for "run_event_handlers", custom event handlers."""
     # Look for event handlers
     # 1. Handlers for specific event
     # 2. General handlers
     handlers = self.get_events_conf(config, '%s handler' % ctx.event)
     if not handlers and (ctx.event in self.get_events_conf(
             config, 'handler events', [])):
         handlers = self.get_events_conf(config, 'handlers')
     if not handlers:
         return
     for i, handler in enumerate(handlers):
         cmd_key = ('%s-%02d' % (self.SUITE_EVENT_HANDLER, i), ctx.event)
         # Handler command may be a string for substitution
         abort_on_error = self.get_events_conf(
             config, 'abort if %s handler fails' % ctx.event)
         try:
             handler_data = {
                 'event': quote(ctx.event),
                 'suite': quote(ctx.suite),
                 'message': quote(ctx.reason),
             }
             if config.cfg['meta']:
                 for key, value in config.cfg['meta'].items():
                     if key == "URL":
                         handler_data["suite_url"] = quote(value)
                     handler_data[key] = quote(value)
             cmd = handler % (handler_data)
         except KeyError as exc:
             message = "%s bad template: %s" % (cmd_key, exc)
             LOG.error(message)
             if abort_on_error:
                 raise SuiteEventError(message)
             continue
         if cmd == handler:
             # Nothing substituted, assume classic interface
             cmd = "%s '%s' '%s' '%s'" % (handler, ctx.event, ctx.suite,
                                          ctx.reason)
         proc_ctx = SuiteProcContext(cmd_key,
                                     cmd,
                                     env=dict(os.environ),
                                     shell=True)
         if abort_on_error or self.proc_pool.is_closed():
             # Run command in foreground if abort on failure is set or if
             # process pool is closed
             self.proc_pool.run_command(proc_ctx)
             self._run_event_handlers_callback(
                 proc_ctx, abort_on_error=abort_on_error)
         else:
             # Run command using process pool otherwise
             self.proc_pool.put_command(proc_ctx,
                                        self._run_event_handlers_callback)
Exemplo n.º 50
0
    def signout(self, server_obj):
        """Force forget this client (for use by GUI etc.)."""

        caller = server_obj.getLocalStorage().caller
        LOG.info(
            self.__class__.LOG_SIGNOUT_TMPL % (
                caller.user, caller.host, caller.prog_name, caller.uuid))
        try:
            del self.clients[caller.uuid]
        except KeyError:
            # Already forgotten.
            pass
        self._housekeep()
Exemplo n.º 51
0
 def _remote_host_select_callback(self, proc_ctx, cmd_str):
     """Callback when host select command exits"""
     self.ready = True
     if proc_ctx.ret_code == 0 and proc_ctx.out:
         # Good status
         LOG.debug(proc_ctx)
         self.remote_host_str_map[cmd_str] = proc_ctx.out.splitlines()[0]
     else:
         # Bad status
         LOG.error(proc_ctx)
         self.remote_host_str_map[cmd_str] = TaskRemoteMgmtError(
             TaskRemoteMgmtError.MSG_SELECT, (cmd_str, None), cmd_str,
             proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)
Exemplo n.º 52
0
 def _remote_host_select_callback(self, proc_ctx, cmd_str):
     """Callback when host select command exits"""
     self.ready = True
     if proc_ctx.ret_code == 0 and proc_ctx.out:
         # Good status
         LOG.debug(proc_ctx)
         self.remote_host_str_map[cmd_str] = proc_ctx.out.splitlines()[0]
     else:
         # Bad status
         LOG.error(proc_ctx)
         self.remote_host_str_map[cmd_str] = TaskRemoteMgmtError(
             TaskRemoteMgmtError.MSG_SELECT, (cmd_str, None), cmd_str,
             proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)
Exemplo n.º 53
0
    def signout(self, server_obj):
        """Force forget this client (for use by GUI etc.)."""

        caller = server_obj.getLocalStorage().caller
        LOG.info(
            self.__class__.LOG_SIGNOUT_TMPL % (
                caller.user, caller.host, caller.prog_name, caller.uuid))
        try:
            del self.clients[caller.uuid]
        except:
            # Already forgotten.
            pass
        self._housekeep()
Exemplo n.º 54
0
 def _run_event_custom_handlers(self, config, ctx):
     """Helper for "run_event_handlers", custom event handlers."""
     # Look for event handlers
     # 1. Handlers for specific event
     # 2. General handlers
     handlers = self.get_events_conf(config, '%s handler' % ctx.event)
     if not handlers and (
             ctx.event in
             self.get_events_conf(config, 'handler events', [])):
         handlers = self.get_events_conf(config, 'handlers')
     if not handlers:
         return
     for i, handler in enumerate(handlers):
         cmd_key = ('%s-%02d' % (self.SUITE_EVENT_HANDLER, i), ctx.event)
         # Handler command may be a string for substitution
         abort_on_error = self.get_events_conf(
             config, 'abort if %s handler fails' % ctx.event)
         try:
             handler_data = {
                 'event': quote(ctx.event),
                 'suite': quote(ctx.suite),
                 'message': quote(ctx.reason),
             }
             if config.cfg['meta']:
                 for key, value in config.cfg['meta'].items():
                     if key == "URL":
                         handler_data["suite_url"] = quote(value)
                     handler_data[key] = quote(value)
             cmd = handler % (handler_data)
         except KeyError as exc:
             message = "%s bad template: %s" % (cmd_key, exc)
             LOG.error(message)
             if abort_on_error:
                 raise SuiteEventError(message)
             continue
         if cmd == handler:
             # Nothing substituted, assume classic interface
             cmd = "%s '%s' '%s' '%s'" % (
                 handler, ctx.event, ctx.suite, ctx.reason)
         proc_ctx = SuiteProcContext(
             cmd_key, cmd, env=dict(os.environ), shell=True)
         if abort_on_error or self.proc_pool.closed:
             # Run command in foreground if abort on failure is set or if
             # process pool is closed
             self.proc_pool.run_command(proc_ctx)
             self._run_event_handlers_callback(
                 proc_ctx, abort_on_error=abort_on_error)
         else:
             # Run command using process pool otherwise
             self.proc_pool.put_command(
                 proc_ctx, self._run_event_handlers_callback)
Exemplo n.º 55
0
    def put_broadcast(self,
                      point_strings=None,
                      namespaces=None,
                      settings=None):
        """Add new broadcast settings (server side interface).

        Return a tuple (modified_settings, bad_options) where:
          modified_settings is list of modified settings in the form:
            [("20200202", "foo", {"command scripting": "true"}, ...]
          bad_options is as described in the docstring for self.clear().
        """
        modified_settings = []
        bad_point_strings = []
        bad_namespaces = []

        with self.lock:
            for setting in settings:
                for point_string in point_strings:
                    # Standardise the point and check its validity.
                    bad_point = False
                    try:
                        point_string = standardise_point_string(point_string)
                    except PointParsingError:
                        if point_string != '*':
                            bad_point_strings.append(point_string)
                            bad_point = True
                    if not bad_point and point_string not in self.broadcasts:
                        self.broadcasts[point_string] = {}
                    for namespace in namespaces:
                        if namespace not in self.linearized_ancestors:
                            bad_namespaces.append(namespace)
                        elif not bad_point:
                            if namespace not in self.broadcasts[point_string]:
                                self.broadcasts[point_string][namespace] = {}
                            self._addict(
                                self.broadcasts[point_string][namespace],
                                setting)
                            modified_settings.append(
                                (point_string, namespace, setting))

        # Log the broadcast
        self.suite_db_mgr.put_broadcast(modified_settings)
        LOG.info(get_broadcast_change_report(modified_settings))

        bad_options = {}
        if bad_point_strings:
            bad_options["point_strings"] = bad_point_strings
        if bad_namespaces:
            bad_options["namespaces"] = bad_namespaces
        return modified_settings, bad_options
Exemplo n.º 56
0
    def kill_task_jobs(self, suite, itasks):
        """Kill jobs of active tasks, and hold the tasks.

        If items is specified, kill active tasks matching given IDs.

        """
        to_kill_tasks = []
        for itask in itasks:
            if itask.state.status in TASK_STATUSES_ACTIVE:
                itask.state.set_held()
                to_kill_tasks.append(itask)
            else:
                LOG.warning('skipping %s: task not killable' % itask.identity)
        self._run_job_cmd(
            self.JOBS_KILL, suite, to_kill_tasks,
            self._kill_task_jobs_callback)
Exemplo n.º 57
0
    def _process_message_started(self, itask, event_time):
        """Helper for process_message, handle a started message."""
        if itask.job_vacated:
            itask.job_vacated = False
            LOG.warning("Vacated job restarted", itask=itask)
        self.pflag = True
        if itask.state.reset_state(TASK_STATUS_RUNNING):
            self.setup_event_handlers(itask, 'started', 'job started')
        itask.set_summary_time('started', event_time)
        self._reset_job_timers(itask)
        self.suite_db_mgr.put_update_task_jobs(itask, {
            "time_run": itask.summary['started_time_string']})

        # submission was successful so reset submission try number
        if TASK_STATUS_SUBMIT_RETRYING in itask.try_timers:
            itask.try_timers[TASK_STATUS_SUBMIT_RETRYING].num = 0
Exemplo n.º 58
0
    def check_task_jobs(self, suite, task_pool):
        """Check submission and execution timeout and polling timers.

        Poll tasks that have timed out and/or have reached next polling time.
        """
        now = time()
        poll_tasks = set()
        for itask in task_pool.get_tasks():
            if self.task_events_mgr.check_job_time(itask, now):
                poll_tasks.add(itask)
                if itask.poll_timer.delay is not None:
                    LOG.info(
                        'poll now, (next in %s)' % (
                            itask.poll_timer.delay_timeout_as_str()),
                        itask=itask)
        if poll_tasks:
            self.poll_task_jobs(suite, poll_tasks)
Exemplo n.º 59
0
 def report_connection_if_denied(self):
     """Log an (un?)successful connection attempt."""
     try:
         (auth_user, prog_name, user, host, uuid,
          priv_level) = get_client_info()
     except Exception:
         LOG.warning(
             self.__class__.LOG_CONNECT_DENIED_TMPL % (
                 "unknown", "unknown", "unknown", "unknown")
         )
         return
     connection_denied = get_client_connection_denied()
     if connection_denied:
         LOG.warning(
             self.__class__.LOG_CONNECT_DENIED_TMPL % (
                 user, host, prog_name, uuid)
         )
Exemplo n.º 60
0
    def kill_task_jobs(self, suite, itasks, warn_skips=False):
        """Kill jobs of active tasks, and hold the tasks.

        If items is specified, kill active tasks matching given IDs.

        """
        active_itasks = []
        for itask in itasks:
            if itask.state.status in TASK_STATUSES_ACTIVE:
                itask.state.set_held()
                active_itasks.append(itask)
            elif warn_skips:  # and not active
                LOG.warning(
                    '%s: skip kill, task not killable' % itask.identity)
        self._run_job_cmd(
            self.JOBS_KILL, suite, active_itasks,
            self._kill_task_jobs_callback)