class SPM_MailMonitor: log = logging.getLogger('storage.MailBox.SpmMailMonitor') def registerMessageType(self, messageType, callback): self._messageTypes[messageType] = callback def unregisterMessageType(self, messageType): del self._messageTypes[messageType] def __init__(self, poolID, maxHostID, inbox, outbox, monitorInterval=2): """ Note: inbox paramerter here should point to the HSM's outbox mailbox file, and vice versa. """ self._messageTypes = {} # Save arguments self._stop = False self._stopped = False self._poolID = poolID tpSize = config.getint('irs', 'thread_pool_size') / 2 waitTimeout = wait_timeout(monitorInterval) maxTasks = config.getint('irs', 'max_tasks') self.tp = ThreadPool("mailbox-spm", tpSize, waitTimeout, maxTasks) self._inbox = inbox if not os.path.exists(self._inbox): self.log.error("SPM_MailMonitor create failed - inbox %s does not " "exist" % repr(self._inbox)) raise RuntimeError("SPM_MailMonitor create failed - inbox %s does " "not exist" % repr(self._inbox)) self._outbox = outbox if not os.path.exists(self._outbox): self.log.error("SPM_MailMonitor create failed - outbox %s does " "not exist" % repr(self._outbox)) raise RuntimeError("SPM_MailMonitor create failed - outbox %s " "does not exist" % repr(self._outbox)) self._numHosts = int(maxHostID) self._outMailLen = MAILBOX_SIZE * self._numHosts self._monitorInterval = monitorInterval # TODO: add support for multiple paths (multiple mailboxes) self._outgoingMail = self._outMailLen * "\0" self._incomingMail = self._outgoingMail self._inCmd = ['dd', 'if=' + str(self._inbox), 'iflag=direct,fullblock', 'count=1' ] self._outCmd = ['dd', 'of=' + str(self._outbox), 'oflag=direct', 'iflag=fullblock', 'conv=notrunc', 'count=1' ] self._outLock = threading.Lock() self._inLock = threading.Lock() # Clear outgoing mail self.log.debug("SPM_MailMonitor - clearing outgoing mail, command is: " "%s", self._outCmd) cmd = self._outCmd + ['bs=' + str(self._outMailLen)] (rc, out, err) = _mboxExecCmd(cmd, data=self._outgoingMail) if rc: self.log.warning("SPM_MailMonitor couldn't clear outgoing mail, " "dd failed") self._thread = concurrent.thread( self._run, name="mailbox-spm", log=self.log) self._thread.start() self.log.debug('SPM_MailMonitor created for pool %s' % self._poolID) def wait(self, timeout=None): self._thread.join(timeout=timeout) return not self._thread.is_alive() def stop(self): self._stop = True def isStopped(self): return self._stopped def getMaxHostID(self): return self._numHosts def setMaxHostID(self, newMaxId): with self._inLock: with self._outLock: diff = newMaxId - self._numHosts if diff > 0: delta = MAILBOX_SIZE * diff * "\0" self._outgoingMail += delta self._incomingMail += delta elif diff < 0: delta = MAILBOX_SIZE * diff self._outgoingMail = self._outgoingMail[:-delta] self._incomingMail = self._incomingMail[:-delta] self._numHosts = newMaxId self._outMailLen = MAILBOX_SIZE * self._numHosts @classmethod def validateMailbox(self, mailbox, mailboxIndex): """ Return True if mailbox has a valid checksum, and is not an empty mailbox, False otherwise. """ assert len(mailbox) == MAILBOX_SIZE data = mailbox[:-CHECKSUM_BYTES] checksum = mailbox[-CHECKSUM_BYTES:] n = misc.checksum(data, CHECKSUM_BYTES) expected = struct.pack('<l', n) # Assumes CHECKSUM_BYTES equals 4!!! if checksum != expected: self.log.error( "mailbox %s checksum failed, not clearing mailbox, clearing " "new mail (data=%r, checksum=%r, expected=%r)", mailboxIndex, data, checksum, expected) return False elif expected == pZeroChecksum: return False # Ignore messages of empty mailbox return True def _handleRequests(self, newMail): send = False # run through all messages and check if new messages have arrived # (since last read) for host in range(0, self._numHosts): # Check mailbox checksum mailboxStart = host * MAILBOX_SIZE isMailboxValidated = False for i in range(0, MESSAGES_PER_MAILBOX): msgId = host * SLOTS_PER_MAILBOX + i msgStart = msgId * MESSAGE_SIZE # First byte of message is message version. Check message # version, if 0 then message is empty and can be skipped if newMail[msgStart] in ['\0', '0']: continue # Most mailboxes are probably empty so it costs less to check # that all messages start with 0 than to validate the mailbox, # therefor this is done after we find a non empty message in # mailbox if not isMailboxValidated: if not self.validateMailbox( newMail[mailboxStart:mailboxStart + MAILBOX_SIZE], host): # Cleaning invalid mbx in newMail newMail = newMail[:mailboxStart] + EMPTYMAILBOX + \ newMail[mailboxStart + MAILBOX_SIZE:] break self.log.debug("SPM_MailMonitor: Mailbox %s validated, " "checking mail", host) isMailboxValidated = True newMsg = newMail[msgStart:msgStart + MESSAGE_SIZE] msgOffset = msgId * MESSAGE_SIZE if newMsg == CLEAN_MESSAGE: # Should probably put a setter on outgoingMail which would # take the lock self._outLock.acquire() try: self._outgoingMail = \ self._outgoingMail[0:msgOffset] + CLEAN_MESSAGE + \ self._outgoingMail[msgOffset + MESSAGE_SIZE: self._outMailLen] finally: self._outLock.release() send = True continue # Message isn't empty, check if its new isMessageNew = False for j in range(msgStart, msgStart + MESSAGE_SIZE): if newMail[j] != self._incomingMail[j]: isMessageNew = True break # If search exhausted, i.e. message hasn't changed since last # read, it can be skipped if not isMessageNew: continue # We only get here if there is a novel request try: msgType = newMail[msgStart + 1:msgStart + 5] if msgType in self._messageTypes: # Use message class to process request according to # message specific logic id = str(uuid.uuid4()) self.log.debug("SPM_MailMonitor: processing request: " "%s" % repr(newMail[ msgStart:msgStart + MESSAGE_SIZE])) res = self.tp.queueTask( id, runTask, (self._messageTypes[msgType], msgId, newMail[msgStart: msgStart + MESSAGE_SIZE]) ) if not res: raise Exception() else: self.log.error("SPM_MailMonitor: unknown message type " "encountered: %s", msgType) except RuntimeError as e: self.log.error("SPM_MailMonitor: exception: %s caught " "while handling message: %s", str(e), newMail[msgStart:msgStart + MESSAGE_SIZE]) except: self.log.error("SPM_MailMonitor: exception caught while " "handling message: %s", newMail[msgStart:msgStart + MESSAGE_SIZE], exc_info=True) self._incomingMail = newMail return send def _checkForMail(self): # Lock is acquired in order to make sure that neither _numHosts nor # incomingMail are changed during checkForMail self._inLock.acquire() try: # self.log.debug("SPM_MailMonitor -_checking for mail") cmd = self._inCmd + ['bs=' + str(self._outMailLen)] # self.log.debug("SPM_MailMonitor - reading incoming mail, " # "command: " + str(cmd)) (rc, in_mail, err) = misc.execCmd(cmd, raw=True) if rc: raise IOError(errno.EIO, "_handleRequests._checkForMail - " "Could not read mailbox: %s" % self._inbox) if (len(in_mail) != (self._outMailLen)): self.log.error('SPM_MailMonitor: _checkForMail - dd succeeded ' 'but read %d bytes instead of %d, cannot check ' 'mail. Read mail contains: %s', len(in_mail), self._outMailLen, repr(in_mail[:80])) raise RuntimeError("_handleRequests._checkForMail - Could not " "read mailbox") # self.log.debug("Parsing inbox content: %s", in_mail) if self._handleRequests(in_mail): self._outLock.acquire() try: cmd = self._outCmd + ['bs=' + str(self._outMailLen)] (rc, out, err) = _mboxExecCmd(cmd, data=self._outgoingMail) if rc: self.log.warning("SPM_MailMonitor couldn't write " "outgoing mail, dd failed") finally: self._outLock.release() finally: self._inLock.release() def sendReply(self, msgID, msg): # Lock is acquired in order to make sure that neither _numHosts nor # outgoingMail are changed while used self._outLock.acquire() try: msgOffset = msgID * MESSAGE_SIZE self._outgoingMail = \ self._outgoingMail[0:msgOffset] + msg.payload + \ self._outgoingMail[msgOffset + MESSAGE_SIZE:self._outMailLen] mailboxOffset = (msgID / SLOTS_PER_MAILBOX) * MAILBOX_SIZE mailbox = self._outgoingMail[mailboxOffset: mailboxOffset + MAILBOX_SIZE] cmd = self._outCmd + ['bs=' + str(MAILBOX_SIZE), 'seek=' + str(mailboxOffset / MAILBOX_SIZE)] # self.log.debug("Running command: %s, for message id: %s", # str(cmd), str(msgID)) (rc, out, err) = _mboxExecCmd(cmd, data=mailbox) if rc: self.log.error("SPM_MailMonitor: sendReply - couldn't send " "reply, dd failed") finally: self._outLock.release() def _run(self): try: while not self._stop: try: self._checkForMail() except: self.log.error("Error checking for mail", exc_info=True) time.sleep(self._monitorInterval) finally: self._stopped = True self.tp.joinAll(waitForTasks=False) self.log.info("SPM_MailMonitor - Incoming mail monitoring thread " "stopped")
class TaskManager: log = logging.getLogger('storage.taskmanager') def __init__(self, tpSize=config.getint('irs', 'thread_pool_size'), waitTimeout=3, maxTasks=config.getint('irs', 'max_tasks')): self.tp = ThreadPool("tasks", tpSize, waitTimeout, maxTasks) self._tasks = {} self._unqueuedTasks = [] self._lock = threading.Lock() def queue(self, task): return self._queueTask(task, task.commit) def queueRecovery(self, task): return self._queueTask(task, task.recover) def _queueTask(self, task, method): with self._lock: if task.id in self._tasks: raise se.AddTaskError('Task id already in use: {0}'.format( task.id)) self.log.debug("queuing task: %s", task.id) self._tasks[task.id] = task try: if not self.tp.queueTask(task.id, method): self.log.error("unable to queue task: %s", task.dumpTask()) with self._lock: del self._tasks[task.id] raise se.AddTaskError() self.log.debug("task queued: %s", task.id) except Exception: self.log.exception('could not queue task %s', task.id) raise return task.id def scheduleJob(self, type, store, task, jobName, func, *args): task.setTag(type) if store is not None: task.setPersistence(store, cleanPolicy=TaskCleanType.manual) task.setManager(self) task.setRecoveryPolicy("auto") task.addJob(Job(jobName, func, *args)) self.log.debug("scheduled job %s for task %s ", jobName, task.id) def _getTask(self, taskID): Task.validateID(taskID) t = self._tasks.get(taskID, None) if t is None: raise se.UnknownTask(taskID) return t def prepareForShutdown(self, wait=False): """ Prepare to shutdown and stop all threads. """ self.log.debug("Request to stop all threads (wait=%s)", wait) self.tp.joinAll(waitForThreads=wait) def getTaskStatus(self, taskID): """ Internal return Task status for a given task. """ self.log.debug("Entry. taskID: %s", taskID) t = self._getTask(taskID) status = t.deprecated_getStatus() self.log.debug("Return. Response: %s", status) return status def getAllTasksStatuses(self, tag=None): """ Return Task status for all tasks by type. """ self.log.debug("Entry.") subRes = {} with self._lock: for taskID, task in self._tasks.items(): if not tag or tag in task.getTags(): try: subRes[taskID] = task.deprecated_getStatus() except se.UnknownTask: # Return statuses for existing tasks only. self.log.warn( "Unknown task %s. " "Maybe task was already cleared.", taskID) self.log.debug("Return: %s", subRes) return subRes def getAllTasks(self): """ Return Tasks for all public tasks. """ self.log.debug("Entry.") subRes = {} with self._lock: for taskID, task in self._tasks.items(): try: subRes[taskID] = task.getDetails() except se.UnknownTask: # Return info for existing tasks only. self.log.warn( "Unknown task %s. Maybe task was already " "cleared.", taskID) self.log.debug("Return: %s", subRes) return subRes def unloadTasks(self, tag=None): """ Remove Tasks from managed tasks list """ self.log.debug("Entry.") with self._lock: for taskID, task in list(self._tasks.items()): if not tag or tag in task.getTags(): self._tasks.pop(taskID, None) self.log.debug("Return") def stopTask(self, taskID, force=False): """ Stop a task according to given uuid. """ self.log.debug("Entry. taskID: %s", taskID) t = self._getTask(taskID) t.stop(force=force) self.log.debug("Return.") return True def revertTask(self, taskID): self.log.debug("Entry. taskID: %s", taskID) # TODO: Should we stop here implicitly ??? t = self._getTask(taskID) t.rollback() self.log.debug("Return.") def clearTask(self, taskID): """ Clear a task according to given uuid. """ self.log.debug("Entry. taskID: %s", taskID) # TODO: Should we stop here implicitly ??? t = self._getTask(taskID) # Task clean procedure may block so we don't want to do it under tasks # dict lock, instead use pop under lock in case the task was already # removed by another call. t.clean() with self._lock: self._tasks.pop(taskID, None) self.log.debug("Return.") def getTaskInfo(self, taskID): """ Return task's data according to given uuid. """ self.log.debug("Entry. taskID: %s", taskID) t = self._getTask(taskID) info = t.getInfo() self.log.debug("Return. Response: %s", info) return info def getAllTasksInfo(self, tag=None): """ Return Task info for all public tasks. i.e - not internal. """ self.log.debug("Entry.") subRes = {} with self._lock: for taskID, task in self._tasks.items(): if not tag or tag in task.getTags(): try: subRes[taskID] = task.getInfo() except se.UnknownTask: # Return info for existing tasks only. self.log.warn( "Unknown task %s. " "Maybe task was already cleared.", taskID) self.log.debug("Return. Response: %s", subRes) return subRes def loadDumpedTasks(self, store): if not os.path.exists(store): self.log.debug("task dump path %s does not exist.", store) return # taskID is the root part of each (root.ext) entry in the dump task dir tasksIDs = set(os.path.splitext(tid)[0] for tid in os.listdir(store)) for taskID in tasksIDs: self.log.debug("Loading dumped task %s", taskID) try: t = Task.loadTask(store, taskID) t.setPersistence(store, str(t.persistPolicy), str(t.cleanPolicy)) self._unqueuedTasks.append(t) except Exception: self.log.error("taskManager: Skipping directory: %s", taskID, exc_info=True) continue def recoverDumpedTasks(self): for task in self._unqueuedTasks[:]: self.queueRecovery(task) self._unqueuedTasks.remove(task)
class SPM_MailMonitor: log = logging.getLogger('storage.mailbox') def registerMessageType(self, messageType, callback): self._messageTypes[messageType] = callback def unregisterMessageType(self, messageType): del self._messageTypes[messageType] def __init__(self, poolID, maxHostID, inbox, outbox, monitorInterval=2.0, eventInterval=0.5): """ Note: inbox parameter here should point to the HSM's outbox mailbox file, and vice versa. """ self._messageTypes = {} # Save arguments self._stop = False self._stopped = False self._poolID = poolID tpSize = config.getint('irs', 'thread_pool_size') // 2 waitTimeout = wait_timeout(monitorInterval) maxTasks = config.getint('irs', 'max_tasks') self.tp = ThreadPool("mailbox-spm", tpSize, waitTimeout, maxTasks) self._inbox = inbox if not os.path.exists(self._inbox): self.log.error("SPM_MailMonitor create failed - inbox %s does not " "exist" % repr(self._inbox)) raise RuntimeError("SPM_MailMonitor create failed - inbox %s does " "not exist" % repr(self._inbox)) self._outbox = outbox if not os.path.exists(self._outbox): self.log.error("SPM_MailMonitor create failed - outbox %s does " "not exist" % repr(self._outbox)) raise RuntimeError("SPM_MailMonitor create failed - outbox %s " "does not exist" % repr(self._outbox)) self._numHosts = int(maxHostID) self._outMailLen = MAILBOX_SIZE * self._numHosts self._monitorInterval = monitorInterval self._eventInterval = min(eventInterval, monitorInterval) # TODO: add support for multiple paths (multiple mailboxes) self._outgoingMail = self._outMailLen * b"\0" self._incomingMail = self._outgoingMail self._inCmd = [ 'dd', 'if=' + str(self._inbox), 'iflag=direct,fullblock', 'count=1' ] self._outCmd = [ 'dd', 'of=' + str(self._outbox), 'oflag=direct', 'iflag=fullblock', 'conv=notrunc', 'count=1' ] self._outLock = threading.Lock() self._inLock = threading.Lock() # The event detected in an empty mailbox. self._last_event = uuid.UUID(int=0) # Clear outgoing mail self.log.debug( "SPM_MailMonitor - clearing outgoing mail, command is: " "%s", self._outCmd) cmd = self._outCmd + ['bs=' + str(self._outMailLen)] (rc, out, err) = _mboxExecCmd(cmd, data=self._outgoingMail) if rc: self.log.warning("SPM_MailMonitor couldn't clear outgoing mail, " "dd failed") self._thread = concurrent.thread(self._run, name="mailbox-spm", log=self.log) self.log.debug('SPM_MailMonitor created for pool %s' % self._poolID) def start(self): self._thread.start() def wait(self, timeout=None): self._thread.join(timeout=timeout) return not self._thread.is_alive() def stop(self): self._stop = True def isStopped(self): return self._stopped @classmethod def validateMailbox(self, mailbox, mailboxIndex): """ Return True if mailbox has a valid checksum, and is not an empty mailbox, False otherwise. """ assert len(mailbox) == MAILBOX_SIZE data = mailbox[:-CHECKSUM_BYTES] csum = mailbox[-CHECKSUM_BYTES:] expected = packed_checksum(data) if csum != expected: self.log.error( "mailbox %s checksum failed, not clearing mailbox, clearing " "new mail (data=%r, checksum=%r, expected=%r)", mailboxIndex, data, checksum, expected) return False elif expected == pZeroChecksum: return False # Ignore messages of empty mailbox return True def _handleRequests(self, newMail): send = False # run through all messages and check if new messages have arrived # (since last read) for host in range(0, self._numHosts): # Check mailbox checksum mailboxStart = host * MAILBOX_SIZE isMailboxValidated = False for i in range(0, MESSAGES_PER_MAILBOX): msgId = host * SLOTS_PER_MAILBOX + i msgStart = msgId * MESSAGE_SIZE # First byte of message is message version. # A null byte indicates an empty message to be skipped. if newMail[msgStart:msgStart + 1] == b"\0": continue # Most mailboxes are probably empty so it costs less to check # that all messages start with 0 than to validate the mailbox, # therefor this is done after we find a non empty message in # mailbox if not isMailboxValidated: if not self.validateMailbox( newMail[mailboxStart:mailboxStart + MAILBOX_SIZE], host): # Cleaning invalid mbx in newMail newMail = newMail[:mailboxStart] + EMPTYMAILBOX + \ newMail[mailboxStart + MAILBOX_SIZE:] break self.log.debug( "SPM_MailMonitor: Mailbox %s validated, " "checking mail", host) isMailboxValidated = True newMsg = newMail[msgStart:msgStart + MESSAGE_SIZE] msgOffset = msgId * MESSAGE_SIZE if newMsg == CLEAN_MESSAGE: # Should probably put a setter on outgoingMail which would # take the lock with self._outLock: self._outgoingMail = \ self._outgoingMail[0:msgOffset] + CLEAN_MESSAGE + \ self._outgoingMail[msgOffset + MESSAGE_SIZE: self._outMailLen] send = True continue # Message isn't empty, check if its new isMessageNew = False for j in range(msgStart, msgStart + MESSAGE_SIZE): if newMail[j] != self._incomingMail[j]: isMessageNew = True break # If search exhausted, i.e. message hasn't changed since last # read, it can be skipped if not isMessageNew: continue # We only get here if there is a novel request try: msgType = newMail[msgStart + 1:msgStart + 5] if msgType in self._messageTypes: # Use message class to process request according to # message specific logic id = str(uuid.uuid4()) self.log.debug( "SPM_MailMonitor: processing request: " "%s" % repr(newMail[msgStart:msgStart + MESSAGE_SIZE])) res = self.tp.queueTask( id, runTask, (self._messageTypes[msgType], msgId, newMail[msgStart:msgStart + MESSAGE_SIZE])) if not res: raise Exception() else: self.log.error( "SPM_MailMonitor: unknown message type " "encountered: %s", msgType) except RuntimeError as e: self.log.error( "SPM_MailMonitor: exception: %s caught " "while handling message: %s", str(e), newMail[msgStart:msgStart + MESSAGE_SIZE]) except: self.log.error( "SPM_MailMonitor: exception caught while " "handling message: %s", newMail[msgStart:msgStart + MESSAGE_SIZE], exc_info=True) self._incomingMail = newMail return send def _checkForMail(self): # Lock is acquired in order to make sure that # incomingMail is not changed during checkForMail with self._inLock: # self.log.debug("SPM_MailMonitor -_checking for mail") cmd = self._inCmd + ['bs=' + str(self._outMailLen)] # self.log.debug("SPM_MailMonitor - reading incoming mail, " # "command: " + str(cmd)) (rc, in_mail, err) = _mboxExecCmd(cmd, raw=True) if rc: raise IOError( errno.EIO, "_handleRequests._checkForMail - " "Could not read mailbox: %s" % self._inbox) if (len(in_mail) != (self._outMailLen)): self.log.error( 'SPM_MailMonitor: _checkForMail - dd succeeded ' 'but read %d bytes instead of %d, cannot check ' 'mail. Read mail contains: %s', len(in_mail), self._outMailLen, repr(in_mail[:80])) raise RuntimeError("_handleRequests._checkForMail - Could not " "read mailbox") # self.log.debug("Parsing inbox content: %s", in_mail) if self._handleRequests(in_mail): with self._outLock: cmd = self._outCmd + ['bs=' + str(self._outMailLen)] (rc, out, err) = _mboxExecCmd(cmd, data=self._outgoingMail) if rc: self.log.warning("SPM_MailMonitor couldn't write " "outgoing mail, dd failed") def sendReply(self, msgID, msg): # Lock is acquired in order to make sure that # outgoingMail is not changed while used with self._outLock: msgOffset = msgID * MESSAGE_SIZE self._outgoingMail = \ self._outgoingMail[0:msgOffset] + msg.payload + \ self._outgoingMail[msgOffset + MESSAGE_SIZE:self._outMailLen] mailboxOffset = (msgID // SLOTS_PER_MAILBOX) * MAILBOX_SIZE mailbox = self._outgoingMail[mailboxOffset:mailboxOffset + MAILBOX_SIZE] cmd = self._outCmd + [ 'bs=' + str(MAILBOX_SIZE), 'seek=' + str(mailboxOffset // MAILBOX_SIZE) ] # self.log.debug("Running command: %s, for message id: %s", # str(cmd), str(msgID)) (rc, out, err) = _mboxExecCmd(cmd, data=mailbox) if rc: self.log.error("SPM_MailMonitor: sendReply - couldn't send " "reply, dd failed") def _run(self): try: while not self._stop: try: self._checkForMail() except: self.log.error("Error checking for mail", exc_info=True) self._wait_for_events() finally: self._stopped = True self.tp.joinAll() self.log.info("SPM_MailMonitor - Incoming mail monitoring thread " "stopped") # Events. def _wait_for_events(self): """ Wait until an event is received in the event block, or the monitor interval has passed. With the default monitor and event intervals, we expect to check event 3 times between mail checks. check mail |---------------------|-------------------| check event | | | | | | With this configuraion we run 3 event checks per 2 seconds, which is expected to consume less than 1% cpu. """ if not config.getboolean("mailbox", "events_enable"): time.sleep(self._monitorInterval) return now = time.monotonic() deadline = now + self._monitorInterval while now < deadline: remaining = deadline - now if remaining <= self._eventInterval: # The last interval before checking mail. time.sleep(remaining) return time.sleep(self._eventInterval) try: event = self._read_event() except ReadEventError as e: self.log.warning("Error reading event block: %s", e) else: if event != self._last_event: self.log.debug("Received event: %s", event) self._last_event = event return now = time.monotonic() def _read_event(self): """ Read event from host 0 mailbox. """ # Even if we got a short read, it will be at least 512 bytes, so # we don't need iflag=fullblock. cmd = [ constants.EXT_DD, 'if=' + str(self._inbox), 'iflag=direct', 'count=1', 'bs=' + str(MAILBOX_SIZE), ] # If read fails, we will retry on the next check. In the worst # case we will check the entire mailbox after one monitor # interval. rc, out, err = _mboxExecCmd(cmd, raw=True) if rc != 0: raise ReadEventError(err.decode()) # Should never happen, we will retry on the next check. if len(out) < 24: raise ReadEventError(f"Short read: {len(out)} < 24") return uuid.UUID(bytes=bytes(out[4:20]))
class SPM_MailMonitor: log = logging.getLogger('storage.MailBox.SpmMailMonitor') def registerMessageType(self, messageType, callback): self._messageTypes[messageType] = callback def unregisterMessageType(self, messageType): del self._messageTypes[messageType] def __init__(self, pool, maxHostID, monitorInterval=2): self._messageTypes = {} # Save arguments self._stop = False self._stopped = False self._poolID = str(pool.spUUID) self._spmStorageDir = pool.storage_repository tpSize = config.getint('irs', 'thread_pool_size') / 2 waitTimeout = 3 maxTasks = config.getint('irs', 'max_tasks') self.tp = ThreadPool("mailbox-spm", tpSize, waitTimeout, maxTasks) # *** IMPORTANT NOTE: The SPM's inbox is the HSMs' outbox and vice # versa *** # self._inbox = os.path.join(self._spmStorageDir, self._poolID, "mastersd", sd.DOMAIN_META_DATA, "inbox") if not os.path.exists(self._inbox): self.log.error("SPM_MailMonitor create failed - inbox %s does not " "exist" % repr(self._inbox)) raise RuntimeError("SPM_MailMonitor create failed - inbox %s does " "not exist" % repr(self._inbox)) self._outbox = os.path.join(self._spmStorageDir, self._poolID, "mastersd", sd.DOMAIN_META_DATA, "outbox") if not os.path.exists(self._outbox): self.log.error("SPM_MailMonitor create failed - outbox %s does " "not exist" % repr(self._outbox)) raise RuntimeError("SPM_MailMonitor create failed - outbox %s " "does not exist" % repr(self._outbox)) self._numHosts = int(maxHostID) self._outMailLen = MAILBOX_SIZE * self._numHosts self._monitorInterval = monitorInterval # TODO: add support for multiple paths (multiple mailboxes) self._outgoingMail = self._outMailLen * "\0" self._incomingMail = self._outgoingMail self._inCmd = ['dd', 'if=' + str(self._inbox), 'iflag=direct,fullblock', 'count=1' ] self._outCmd = ['dd', 'of=' + str(self._outbox), 'oflag=direct', 'iflag=fullblock', 'conv=notrunc', 'count=1' ] self._outLock = threading.Lock() self._inLock = threading.Lock() # Clear outgoing mail self.log.debug("SPM_MailMonitor - clearing outgoing mail, command is: " "%s", self._outCmd) cmd = self._outCmd + ['bs=' + str(self._outMailLen)] (rc, out, err) = _mboxExecCmd(cmd, data=self._outgoingMail) if rc: self.log.warning("SPM_MailMonitor couldn't clear outgoing mail, " "dd failed") t = concurrent.thread(self.run, name="mailbox-spm", logger=self.log.name) t.start() self.log.debug('SPM_MailMonitor created for pool %s' % self._poolID) def stop(self): self._stop = True def isStopped(self): return self._stopped def getMaxHostID(self): return self._numHosts def setMaxHostID(self, newMaxId): with self._inLock: with self._outLock: diff = newMaxId - self._numHosts if diff > 0: delta = MAILBOX_SIZE * diff * "\0" self._outgoingMail += delta self._incomingMail += delta elif diff < 0: delta = MAILBOX_SIZE * diff self._outgoingMail = self._outgoingMail[:-delta] self._incomingMail = self._incomingMail[:-delta] self._numHosts = newMaxId self._outMailLen = MAILBOX_SIZE * self._numHosts def _validateMailbox(self, mailbox, mailboxIndex): chkStart = MAILBOX_SIZE - CHECKSUM_BYTES chk = misc.checksum(mailbox[0:chkStart], CHECKSUM_BYTES) pChk = struct.pack('<l', chk) # Assumes CHECKSUM_BYTES equals 4!!! if pChk != mailbox[chkStart:chkStart + CHECKSUM_BYTES]: self.log.error("SPM_MailMonitor: mailbox %s checksum failed, not " "clearing mailbox, clearing newMail.", str(mailboxIndex)) return False elif pChk == pZeroChecksum: return False # Ignore messages of empty mailbox return True def _handleRequests(self, newMail): send = False # run through all messages and check if new messages have arrived # (since last read) for host in range(0, self._numHosts): # Check mailbox checksum mailboxStart = host * MAILBOX_SIZE isMailboxValidated = False for i in range(0, MESSAGES_PER_MAILBOX): msgId = host * SLOTS_PER_MAILBOX + i msgStart = msgId * MESSAGE_SIZE # First byte of message is message version. Check message # version, if 0 then message is empty and can be skipped if newMail[msgStart] in ['\0', '0']: continue # Most mailboxes are probably empty so it costs less to check # that all messages start with 0 than to validate the mailbox, # therefor this is done after we find a non empty message in # mailbox if not isMailboxValidated: if not self._validateMailbox( newMail[mailboxStart:mailboxStart + MAILBOX_SIZE], host): # Cleaning invalid mbx in newMail newMail = newMail[:mailboxStart] + EMPTYMAILBOX + \ newMail[mailboxStart + MAILBOX_SIZE:] break self.log.debug("SPM_MailMonitor: Mailbox %s validated, " "checking mail", host) isMailboxValidated = True newMsg = newMail[msgStart:msgStart + MESSAGE_SIZE] msgOffset = msgId * MESSAGE_SIZE if newMsg == CLEAN_MESSAGE: # Should probably put a setter on outgoingMail which would # take the lock self._outLock.acquire() try: self._outgoingMail = \ self._outgoingMail[0:msgOffset] + CLEAN_MESSAGE + \ self._outgoingMail[msgOffset + MESSAGE_SIZE: self._outMailLen] finally: self._outLock.release() send = True continue # Message isn't empty, check if its new isMessageNew = False for j in range(msgStart, msgStart + MESSAGE_SIZE): if newMail[j] != self._incomingMail[j]: isMessageNew = True break # If search exhausted, i.e. message hasn't changed since last # read, it can be skipped if not isMessageNew: continue # We only get here if there is a novel request try: msgType = newMail[msgStart + 1:msgStart + 5] if msgType in self._messageTypes: # Use message class to process request according to # message specific logic id = str(uuid.uuid4()) self.log.debug("SPM_MailMonitor: processing request: " "%s" % repr(newMail[ msgStart:msgStart + MESSAGE_SIZE])) res = self.tp.queueTask( id, runTask, (self._messageTypes[msgType], msgId, newMail[msgStart: msgStart + MESSAGE_SIZE]) ) if not res: raise Exception() else: self.log.error("SPM_MailMonitor: unknown message type " "encountered: %s", msgType) except RuntimeError as e: self.log.error("SPM_MailMonitor: exception: %s caught " "while handling message: %s", str(e), newMail[msgStart:msgStart + MESSAGE_SIZE]) except: self.log.error("SPM_MailMonitor: exception caught while " "handling message: %s", newMail[msgStart:msgStart + MESSAGE_SIZE], exc_info=True) self._incomingMail = newMail return send def _checkForMail(self): # Lock is acquired in order to make sure that neither _numHosts nor # incomingMail are changed during checkForMail self._inLock.acquire() try: # self.log.debug("SPM_MailMonitor -_checking for mail") cmd = self._inCmd + ['bs=' + str(self._outMailLen)] # self.log.debug("SPM_MailMonitor - reading incoming mail, " # "command: " + str(cmd)) (rc, in_mail, err) = misc.execCmd(cmd, raw=True) if rc: raise IOError(errno.EIO, "_handleRequests._checkForMail - " "Could not read mailbox: %s" % self._inbox) if (len(in_mail) != (self._outMailLen)): self.log.error('SPM_MailMonitor: _checkForMail - dd succeeded ' 'but read %d bytes instead of %d, cannot check ' 'mail. Read mail contains: %s', len(in_mail), self._outMailLen, repr(in_mail[:80])) raise RuntimeError("_handleRequests._checkForMail - Could not " "read mailbox") # self.log.debug("Parsing inbox content: %s", in_mail) if self._handleRequests(in_mail): self._outLock.acquire() try: cmd = self._outCmd + ['bs=' + str(self._outMailLen)] (rc, out, err) = _mboxExecCmd(cmd, data=self._outgoingMail) if rc: self.log.warning("SPM_MailMonitor couldn't write " "outgoing mail, dd failed") finally: self._outLock.release() finally: self._inLock.release() def sendReply(self, msgID, msg): # Lock is acquired in order to make sure that neither _numHosts nor # outgoingMail are changed while used self._outLock.acquire() try: msgOffset = msgID * MESSAGE_SIZE self._outgoingMail = \ self._outgoingMail[0:msgOffset] + msg.payload + \ self._outgoingMail[msgOffset + MESSAGE_SIZE:self._outMailLen] mailboxOffset = (msgID / SLOTS_PER_MAILBOX) * MAILBOX_SIZE mailbox = self._outgoingMail[mailboxOffset: mailboxOffset + MAILBOX_SIZE] cmd = self._outCmd + ['bs=' + str(MAILBOX_SIZE), 'seek=' + str(mailboxOffset / MAILBOX_SIZE)] # self.log.debug("Running command: %s, for message id: %s", # str(cmd), str(msgID)) (rc, out, err) = _mboxExecCmd(cmd, data=mailbox) if rc: self.log.error("SPM_MailMonitor: sendReply - couldn't send " "reply, dd failed") finally: self._outLock.release() def run(self): try: while not self._stop: try: self._checkForMail() except: self.log.error("Error checking for mail", exc_info=True) time.sleep(self._monitorInterval) finally: self._stopped = True self.tp.joinAll(waitForTasks=False) self.log.info("SPM_MailMonitor - Incoming mail monitoring thread " "stopped")
class TaskManager: log = logging.getLogger('storage.TaskManager') def __init__(self, tpSize=config.getint('irs', 'thread_pool_size'), waitTimeout=3, maxTasks=config.getint('irs', 'max_tasks')): self.tp = ThreadPool("tasks", tpSize, waitTimeout, maxTasks) self._tasks = {} self._unqueuedTasks = [] self._insertTaskLock = threading.Lock() def queue(self, task): return self._queueTask(task, task.commit) def queueRecovery(self, task): return self._queueTask(task, task.recover) def _queueTask(self, task, method): with self._insertTaskLock: if task.id in self._tasks: raise se.AddTaskError( 'Task id already in use: {0}'.format(task.id)) self.log.debug("queuing task: %s", task.id) self._tasks[task.id] = task try: if not self.tp.queueTask(task.id, method): self.log.error("unable to queue task: %s", task.dumpTask()) del self._tasks[task.id] raise se.AddTaskError() self.log.debug("task queued: %s", task.id) except Exception: self.log.exception('could not queue task %s', task.id) raise return task.id def scheduleJob(self, type, store, task, jobName, func, *args): task.setTag(type) if store is not None: task.setPersistence(store, cleanPolicy=TaskCleanType.manual) task.setManager(self) task.setRecoveryPolicy("auto") task.addJob(Job(jobName, func, *args)) self.log.debug("scheduled job %s for task %s ", jobName, task.id) def __getTask(self, taskID): Task.validateID(taskID) t = self._tasks.get(taskID, None) if t is None: raise se.UnknownTask(taskID) return t def prepareForShutdown(self): """ Prepare to shutdown. Stop all threads and asynchronous tasks """ self.log.debug("Request to stop all tasks") # Clear the task queue and terminate all pooled threads for t in self._tasks: if hasattr(t, 'stop'): t.stop() self.log.info(str(t)) self.tp.joinAll(waitForTasks=False, waitForThreads=False) def getTaskStatus(self, taskID): """ Internal return Task status for a given task. """ self.log.debug("Entry. taskID: %s", taskID) t = self.__getTask(taskID) status = t.deprecated_getStatus() self.log.debug("Return. Response: %s", status) return status def getAllTasksStatuses(self, tag=None): """ Return Task status for all tasks by type. """ self.log.debug("Entry.") subRes = {} for taskID, task in self._tasks.items(): if not tag or tag in task.getTags(): try: subRes[taskID] = self.getTaskStatus(taskID) except se.UnknownTask: # Return statuses for existing tasks only. self.log.warn("Unknown task %s. " "Maybe task was already cleared.", taskID) self.log.debug("Return: %s", subRes) return subRes def getAllTasks(self): """ Return Tasks for all public tasks. """ self.log.debug("Entry.") subRes = {} for taskID, task in self._tasks.items(): try: subRes[taskID] = task.getDetails() except se.UnknownTask: # Return info for existing tasks only. self.log.warn("Unknown task %s. Maybe task was already " "cleared.", taskID) self.log.debug("Return: %s", subRes) return subRes def unloadTasks(self, tag=None): """ Remove Tasks from managed tasks list """ self.log.debug("Entry.") for taskID, task in self._tasks.items(): if not tag or tag in task.getTags(): self._tasks.pop(taskID, None) self.log.debug("Return") def stopTask(self, taskID, force=False): """ Stop a task according to given uuid. """ self.log.debug("Entry. taskID: %s", taskID) t = self.__getTask(taskID) t.stop(force=force) self.log.debug("Return.") return True def revertTask(self, taskID): self.log.debug("Entry. taskID: %s", taskID) # TODO: Should we stop here implicitly ??? t = self.__getTask(taskID) t.rollback() self.log.debug("Return.") def clearTask(self, taskID): """ Clear a task according to given uuid. """ self.log.debug("Entry. taskID: %s", taskID) # TODO: Should we stop here implicitly ??? t = self.__getTask(taskID) t.clean() del self._tasks[taskID] self.log.debug("Return.") def getTaskInfo(self, taskID): """ Return task's data according to given uuid. """ self.log.debug("Entry. taskID: %s", taskID) t = self.__getTask(taskID) info = t.getInfo() self.log.debug("Return. Response: %s", info) return info def getAllTasksInfo(self, tag=None): """ Return Task info for all public tasks. i.e - not internal. """ self.log.debug("Entry.") subRes = {} for taskID, task in self._tasks.items(): if not tag or tag in task.getTags(): try: subRes[taskID] = self.getTaskInfo(taskID) except se.UnknownTask: # Return info for existing tasks only. self.log.warn("Unknown task %s. " "Maybe task was already cleared.", taskID) self.log.debug("Return. Response: %s", subRes) return subRes def loadDumpedTasks(self, store): if not os.path.exists(store): self.log.debug("task dump path %s does not exist.", store) return # taskID is the root part of each (root.ext) entry in the dump task dir tasksIDs = set(os.path.splitext(tid)[0] for tid in os.listdir(store)) for taskID in tasksIDs: self.log.debug("Loading dumped task %s", taskID) try: t = Task.loadTask(store, taskID) t.setPersistence(store, str(t.persistPolicy), str(t.cleanPolicy)) self._unqueuedTasks.append(t) except Exception: self.log.error("taskManager: Skipping directory: %s", taskID, exc_info=True) continue def recoverDumpedTasks(self): for task in self._unqueuedTasks[:]: self.queueRecovery(task) self._unqueuedTasks.remove(task)