def manager_thread_main(self): """ Checks for workers that died unexpectedly and listens to their status update messages. """ while True: # wait up to 60 seconds try: command, argument = self.manager_thread_queue.get(True, 60) if command == 'quit': break elif command == 'start': with self._lock: self.now_building[argument] = True elif command == 'end': with self._lock: self.now_building[argument] = False else: logger.warn("Unknown command to manager thread: %s" % command) except Queue.Empty: pass # this one is to remove zombie processes multiprocessing.active_children() with self._lock: ids_to_restart = [] for id, worker in self.workers.items(): if not worker.is_alive(): ids_to_restart.append(id) for id in ids_to_restart: self.restart(id)
def main(self): """ Process main function """ # to make sure we do not use inherited descriptor # from the parent process close_db_connection() self.connection = get_db_connection(self.config) self.watch = Watch.load(self.id, self.connection) if self.watch: logger.info("Starting worker for watch `%s` (id: %s)" % (self.watch.name, self.id)) self.tick_interval = self.watch.interval twillmanager.async.Worker.main(self) else: logger.warn("Failed to start worker for watch (id: %s) - no such watch" % self.id)
def execute(self): """ Called by `tick` and when `execute` schedules immediate script execution.""" try: # large try block to ensure on_end is called if self.on_start: self.on_start() new_status, output = self.execute_script() old_status = self.watch.status self.watch.status = new_status self.watch.time = time.time() self.watch.update_status(self.connection) msg = "Status for watch `%s` (id: %s): %s" % (self.watch.name, self.id, new_status) if new_status != STATUS_OK: logger.warn(msg) else: logger.info(msg) # when was last e-mail alert sent if self.watch.last_alert is None: time_since_last_alert = None else: time_since_last_alert = self.watch.time - self.watch.last_alert status_has_changed = (old_status != new_status) # whether last alert was sent long ago enough to send a failure reminder # (normally e-mails are sent only on change, but a reminder is sent # if the watch keeps failing) if self.watch.reminder_interval: last_alert_was_long_ago = time_since_last_alert is None or time_since_last_alert > self.watch.reminder_interval else: last_alert_was_long_ago = False if status_has_changed or (last_alert_was_long_ago and new_status == STATUS_FAILED): logger.info("Sending notification for watch `%s` (id: %s)" % (self.watch.name, self.id)) self.status_notify(old_status, new_status, output) self.watch.last_alert = time.time() self.watch.update_status(self.connection) except Exception, e: logger.error("Worker `%s` (id: %s) failed with exception: %s" % (self.watch.name, self.id, e.message)) raise