class Request(object): """ Internal request object, don't use directly """ _log = logging.getLogger("storage.ResourceManager.Request") namespace = property(lambda self: self._namespace) name = property(lambda self: self._name) full_name = property(lambda self: "%s.%s" % (self._namespace, self._name)) lockType = property(lambda self: self._lockType) syncRoot = property(lambda self: self._syncRoot) def __init__(self, namespace, name, lockType, callback): self._syncRoot = threading.RLock() self._namespace = namespace self._name = name self._lockType = lockType self._isActive = True self._isCanceled = False self._doneEvent = threading.Event() self._callback = callback self.reqID = str(uuid4()) self._log = SimpleLogAdapter(self._log, { "ResName": self.full_name, "ReqID": self.reqID }) def cancel(self): with self._syncRoot: if not self._isActive: self._log.warning("Tried to cancel a processed request") raise RequestAlreadyProcessedError("Cannot cancel a processed " "request") self._isActive = False self._isCanceled = True self._log.debug("Canceled request") try: self._callback(RequestRef(self), None) except Exception: self._log.warning("Request callback threw an exception", exc_info=True) self._callback = None self._doneEvent.set() def _status(self): with self._syncRoot: if self._isCanceled: return "canceled" if self._doneEvent.isSet(): return "granted" return "waiting" def canceled(self): return self._isCanceled def grant(self): with self._syncRoot: if not self._isActive: self._log.warning("Tried to grant a processed request") raise RequestAlreadyProcessedError("Cannot grant a processed " "request") self._isActive = False self._log.debug("Granted request") self._doneEvent.set() def emit(self, resource): try: ref = RequestRef(self) self._callback(ref, resource) except Exception: self._log.warning("Request callback threw an exception", exc_info=True) def wait(self, timeout=None): return self._doneEvent.wait(timeout) def granted(self): with self._syncRoot: return (not self._isCanceled) and self._doneEvent.isSet() def __str__(self): return "Request for %s - %s: %s" % (self.full_name, self.lockType, self._status())
class Task: # External Task info fields = { # field_name: type "id": str, "name": unicode, "tag": unicode, "store": unicode, "recoveryPolicy": TaskRecoveryType, "persistPolicy": TaskPersistType, "cleanPolicy": TaskCleanType, "priority": TaskPriority, "state": State, "njobs": int, "nrecoveries": int, "metadataVersion": int } log = logging.getLogger('storage.TaskManager.Task') def __init__(self, id, name="", tag="", recovery=TaskRecoveryType.none, priority=TaskPriority.low): """ id - Unique ID name - human readable name persist - persistency type: auto-clean/manual-clean/not-persistent """ if not id: id = str(uuid.uuid4()) self.metadataVersion = TASK_METADATA_VERSION self.validateID(id) self.lock = threading.Lock() self.callbackLock = threading.Lock() self.id = str(id) self.name = name self.tag = tag self.priority = priority self.recoveryPolicy = recovery self.persistPolicy = TaskPersistType.none self.cleanPolicy = TaskCleanType.auto self.store = None self.defaultException = None self.state = State(State.init) self.result = TaskResult(0, "Task is initializing", "") self.resOwner = resourceManager.Owner(proxy(self), raiseonfailure=True) self.error = se.TaskAborted("Unknown error encountered") self.mng = None self._abort_lock = threading.Lock() self._abort_callbacks = set() self._aborting = False self._forceAbort = False self.ref = 0 self.recoveries = [] self.jobs = [] self.nrecoveries = 0 # just utility count - used by save/load self.njobs = 0 # just utility count - used by save/load self.log = SimpleLogAdapter(self.log, {"Task": self.id}) def __del__(self): def finalize(log, owner, taskDir): log.warn("Task was autocleaned") owner.releaseAll() if taskDir is not None: getProcPool().fileUtils.cleanupdir(taskDir) if not self.state.isDone(): taskDir = None if (self.cleanPolicy == TaskCleanType.auto and self.store is not None): taskDir = os.path.join(self.store, self.id) t = concurrent.thread( finalize, args=(self.log, self.resOwner, taskDir), name="task/" + self.id[:8]) t.start() def _done(self): self.resOwner.releaseAll() if self.cleanPolicy == TaskCleanType.auto: self.clean() def __state_preparing(self, fromState): pass def __state_blocked(self, fromState): pass def __state_acquiring(self, fromState): if self.resOwner.requestsGranted(): self._updateState(State.queued) def __state_queued(self, fromState): try: self.mng.queue(self) except Exception as e: self._setError(e) self.stop() def __state_running(self, fromState): self._runJobs() def __state_finished(self, fromState): self._done() def __state_aborting(self, fromState): if self.ref > 1: return self.log.debug("_aborting: recover policy %s", self.recoveryPolicy) if self.recoveryPolicy == TaskRecoveryType.auto: self._updateState(State.racquiring) elif self.recoveryPolicy == TaskRecoveryType.none: self._updateState(State.failed) else: self._updateState(State.waitrecover) def __state_waitrecover(self, fromState): pass def __state_racquiring(self, fromState): if self.resOwner.requestsGranted(): self._updateState(State.recovering) def __state_recovering(self, fromState): self._recover() def __state_raborting(self, fromState): if self.ref == 1: self._updateState(State.failed) else: self.log.warn("State was change to 'raborting' " "when ref was not 1.") def __state_recovered(self, fromState): self._done() def __state_failed(self, fromState): self._done() def __state_cleaning(self, fromState): pass def _updateState(self, state, force=False): fromState = self.state requestedState = state if self._aborting: if self.state.canAbort(): state = State.aborting elif self.state.canAbortRecovery() and state != State.recovered: state = State.raborting self._aborting = False if requestedState == state: self.log.debug("moving from state %s -> state %s", fromState, state) else: self.log.debug("moving from state %s -> state %s instead of %s", fromState, state, requestedState) self.state.moveto(state, force) if self.persistPolicy == TaskPersistType.auto: try: self.persist() except Exception: self.log.warning("Task._updateState: failed persisting task" " %s", self.id, exc_info=True) fn = getattr(self, "_Task__state_%s" % state) fn(fromState) def _updateResult(self, code, message, result): self.result.result = result self.result.code = code self.result.message = message @classmethod def validateID(cls, taskID): if not taskID or "." in taskID: raise se.InvalidParameterException("taskID", taskID) @classmethod def _loadMetaFile(cls, filename, obj, fields): try: for line in getProcPool().readLines(filename): # process current line line = line.encode('utf8') if line.find(KEY_SEPARATOR) < 0: continue parts = line.split(KEY_SEPARATOR) if len(parts) != 2: cls.log.warning("Task._loadMetaFile: %s - ignoring line" " '%s'", filename, line) continue field = _eq_decode(parts[0].strip()) value = _eq_decode(parts[1].strip()) if field not in fields: cls.log.warning("Task._loadMetaFile: %s - ignoring field" " %s in line '%s'", filename, field, line) continue ftype = fields[field] setattr(obj, field, ftype(value)) except Exception: cls.log.error("Unexpected error", exc_info=True) raise se.TaskMetaDataLoadError(filename) @classmethod def _dump(cls, obj, fields): lines = [] for field in fields: try: value = unicode(getattr(obj, field)) except AttributeError: cls.log.warning("object %s field %s not found" % (obj, field), exc_info=True) else: try: field = _eq_encode(field) value = _eq_encode(value) except ValueError as e: cls.log.warning("Object %s: Cannot encode field %s or " "value %s. Skipping field. %s", obj, field, value, e) else: lines.append("%s %s %s" % (field, KEY_SEPARATOR, value)) return lines @classmethod def _saveMetaFile(cls, filename, obj, fields): try: getProcPool().writeLines(filename, [l.encode('utf8') + "\n" for l in cls._dump(obj, fields)]) except Exception: cls.log.error("Unexpected error", exc_info=True) raise se.TaskMetaDataSaveError(filename) def _loadTaskMetaFile(self, taskDir): taskFile = os.path.join(taskDir, self.id + TASK_EXT) self._loadMetaFile(taskFile, self, Task.fields) def _saveTaskMetaFile(self, taskDir): taskFile = os.path.join(taskDir, self.id + TASK_EXT) self._saveMetaFile(taskFile, self, Task.fields) def _loadJobMetaFile(self, taskDir, n): taskFile = os.path.join(taskDir, self.id + JOB_EXT + NUM_SEP + str(n)) self._loadMetaFile(taskFile, self.jobs[n], Job.fields) def _saveJobMetaFile(self, taskDir, n): taskFile = os.path.join(taskDir, self.id + JOB_EXT + NUM_SEP + str(n)) self._saveMetaFile(taskFile, self.jobs[n], Job.fields) def _loadRecoveryMetaFile(self, taskDir, n): taskFile = os.path.join(taskDir, self.id + RECOVER_EXT + NUM_SEP + str(n)) self._loadMetaFile(taskFile, self.recoveries[n], Recovery.fields) def _saveRecoveryMetaFile(self, taskDir, n): taskFile = os.path.join(taskDir, self.id + RECOVER_EXT + NUM_SEP + str(n)) self._saveMetaFile(taskFile, self.recoveries[n], Recovery.fields) def _loadTaskResultMetaFile(self, taskDir): taskFile = os.path.join(taskDir, self.id + RESULT_EXT) self._loadMetaFile(taskFile, self.result, TaskResult.fields) def _saveTaskResultMetaFile(self, taskDir): taskFile = os.path.join(taskDir, self.id + RESULT_EXT) self._saveMetaFile(taskFile, self.result, TaskResult.fields) def _getResourcesKeyList(self, taskDir): keys = [] for path in getProcPool().glob.glob(os.path.join(taskDir, "*" + RESOURCE_EXT)): filename = os.path.basename(path) keys.append(filename[:filename.rfind(RESOURCE_EXT)]) return keys def _load(self, storPath, ext=""): self.log.debug("%s: load from %s, ext '%s'", self, storPath, ext) if self.state != State.init: raise se.TaskMetaDataLoadError("task %s - can't load self: " "not in init state" % self) taskDir = os.path.join(storPath, str(self.id) + str(ext)) if not getProcPool().os.path.exists(taskDir): raise se.TaskDirError("load: no such task dir '%s'" % taskDir) oldid = self.id self._loadTaskMetaFile(taskDir) if self.id != oldid: raise se.TaskMetaDataLoadError("task %s: loaded file do not match" " id (%s != %s)" % (self, self.id, oldid)) if self.state == State.finished: self._loadTaskResultMetaFile(taskDir) for jn in range(self.njobs): self.jobs.append(Job("load", None)) self._loadJobMetaFile(taskDir, jn) self.jobs[jn].setOwnerTask(self) for rn in range(self.nrecoveries): self.recoveries.append(Recovery("load", "load", "load", "load", "")) self._loadRecoveryMetaFile(taskDir, rn) self.recoveries[rn].setOwnerTask(self) def _save(self, storPath): origTaskDir = os.path.join(storPath, self.id) if not getProcPool().os.path.exists(origTaskDir): raise se.TaskDirError("_save: no such task dir '%s'" % origTaskDir) taskDir = os.path.join(storPath, self.id + TEMP_EXT) self.log.debug("_save: orig %s temp %s", origTaskDir, taskDir) if getProcPool().os.path.exists(taskDir): getProcPool().fileUtils.cleanupdir(taskDir) getProcPool().os.mkdir(taskDir) try: self.njobs = len(self.jobs) self.nrecoveries = len(self.recoveries) self._saveTaskMetaFile(taskDir) if self.state == State.finished: self._saveTaskResultMetaFile(taskDir) for jn in range(self.njobs): self._saveJobMetaFile(taskDir, jn) for rn in range(self.nrecoveries): self._saveRecoveryMetaFile(taskDir, rn) except Exception as e: self.log.error("Unexpected error", exc_info=True) try: getProcPool().fileUtils.cleanupdir(taskDir) except: self.log.warning("can't remove temp taskdir %s" % taskDir) raise se.TaskPersistError("%s persist failed: %s" % (self, e)) # Make sure backup dir doesn't exist getProcPool().fileUtils.cleanupdir(origTaskDir + BACKUP_EXT) getProcPool().os.rename(origTaskDir, origTaskDir + BACKUP_EXT) getProcPool().os.rename(taskDir, origTaskDir) getProcPool().fileUtils.cleanupdir(origTaskDir + BACKUP_EXT) getProcPool().fileUtils.fsyncPath(origTaskDir) def _clean(self, storPath): taskDir = os.path.join(storPath, self.id) getProcPool().fileUtils.cleanupdir(taskDir) def _recoverDone(self): # protect agains races with stop/abort self.log.debug("Recover Done: state %s", self.state) while True: try: if self.state == State.recovering: self._updateState(State.recovered) elif self.state == State.raborting: self._updateState(State.failed) return except se.TaskStateTransitionError: self.log.error("Unexpected error", exc_info=True) def _recover(self): self.log.debug("_recover") if not self.state == State.recovering: raise se.TaskStateError("%s: _recover in state %s" % (self, self.state)) try: while self.state == State.recovering: rec = self.popRecovery() self.log.debug("running recovery %s", rec) if not rec: break self._run(rec.run) except Exception as e: self.log.warning("task %s: recovery failed: %s", self, e, exc_info=True) # protect agains races with stop/abort try: if self.state == State.recovering: self._updateState(State.raborting) except se.TaskStateTransitionError: pass self._recoverDone() def resourceAcquired(self, namespace, resource, locktype): # Callback from resourceManager.Owner. May be called by another thread. self._incref() try: self.callbackLock.acquire() try: self.log.debug("_resourcesAcquired: %s.%s (%s)", namespace, resource, locktype) if self.state == State.preparing: return if self.state == State.acquiring: self._updateState(State.acquiring) elif self.state == State.racquiring: self._updateState(State.racquiring) elif self.state == State.blocked: self._updateState(State.preparing) elif (self.state == State.aborting or self.state == State.raborting): self.log.debug("resource %s.%s acquired while in state %s", namespace, resource, self.state) else: raise se.TaskStateError("acquire is not allowed in state" " %s" % self.state) finally: self.callbackLock.release() finally: self._decref() def resourceRegistered(self, namespace, resource, locktype): self._incref() try: self.callbackLock.acquire() try: # Callback from resourceManager.Owner. May be called # by another thread. self.log.debug("_resourcesAcquired: %s.%s (%s)", namespace, resource, locktype) # Protect against races with stop/abort if self.state == State.preparing: self._updateState(State.blocked) finally: self.callbackLock.release() finally: self._decref() def _setError(self, e=se.TaskAborted("Unknown error encountered")): self.log.error("Unexpected error", exc_info=True) self.error = e def _run(self, fn, *args, **kargs): code = 100 message = "Unknown Error" try: return fn(*args, **kargs) except se.StorageException as e: code = e.code message = e.message self._setError(e) except Exception as e: message = unicode(e) self._setError(e) except: self._setError() self.log.debug("Task._run: %s %s %s failed - stopping task", self, args, kargs) self.stop() raise se.TaskAborted(message, code) def _runJobs(self): result = "" code = 100 message = "Unknown Error" i = 0 j = None try: if self.aborting(): raise se.TaskAborted("shutting down") if not self.state == State.running: raise se.TaskStateError("%s: can't run Jobs in state %s" % (self, self.state)) # for now: result is the last job result, jobs are run sequentially for j in self.jobs: if self.aborting(): raise se.TaskAborted("shutting down") self.log.debug("Task.run: running job %s: %s" % (i, j)) self._updateResult( 0, 'running job {0} of {1}'.format(i + 1, len(self.jobs)), '') result = self._run(j.run) if result is None: result = "" i += 1 j = None self._updateResult(0, "%s jobs completed successfully" % i, result) self._updateState(State.finished) self.log.debug('Task.run: exit - success: result %s' % result) return result except se.TaskAborted as e: self.log.debug("aborting: %s", e) message = e.value code = e.abortedcode if not self.aborting(): self.log.error("Aborted exception but not in aborting state") raise self._updateResult(code, message, "") def _doAbort(self, force=False): self.log.debug("Task._doAbort: force %s" % force) self.lock.acquire() # Am I really the last? if self.ref != 0: self.lock.release() return self.ref += 1 self.lock.release() try: try: if (not self.state.canAbort() and (force and not self.state.canAbortRecovery())): self.log.warning("Task._doAbort %s: ignoring - " "at state %s", self, self.state) return self.resOwner.cancelAll() if self.state.canAbort(): self._updateState(State.aborting) else: self._updateState(State.raborting) except se.TaskAborted: self._updateState(State.failed) finally: self.lock.acquire() self.ref -= 1 self.lock.release() # If something horrible went wrong. Just fail the task. if not self.state.isDone(): self.log.warn("Task exited in non terminal state. " "Setting tasks as failed.") self._updateState(State.failed) def _doRecover(self): self.lock.acquire() # Am I really the last? if self.ref != 0: self.lock.release() raise se.TaskHasRefs(unicode(self)) self.ref += 1 self.lock.release() try: self._updateState(State.racquiring) finally: self.lock.acquire() self.ref -= 1 self.lock.release() def _incref(self, force=False): self.lock.acquire() try: if self.aborting() and (self._forceAbort or not force): raise se.TaskAborted(unicode(self)) self.ref += 1 ref = self.ref return ref finally: self.lock.release() def _decref(self, force=False): self.lock.acquire() self.ref -= 1 ref = self.ref self.lock.release() self.log.debug("ref %d aborting %s", ref, self.aborting()) if ref == 0 and self.aborting(): self._doAbort(force) return ref ########################################################################## # Public Interface # ########################################################################## def setDefaultException(self, exceptionObj): # defaultException must have response method if exceptionObj and not hasattr(exceptionObj, "response"): raise se.InvalidDefaultExceptionException(unicode(exceptionObj)) self.defaultException = exceptionObj def setTag(self, tag): if KEY_SEPARATOR in tag: raise ValueError("tag cannot include %s character" % KEY_SEPARATOR) self.tag = unicode(tag) def isDone(self): return self.state.isDone() def addJob(self, job): """ Add async job to the task. Assumes all resources are acquired or registered. """ if not self.mng: raise se.UnmanagedTask(unicode(self)) if not isinstance(job, Job): raise TypeError("Job param %s(%s) must be Job object" % (repr(job), type(job))) if self.state != State.preparing: raise Exception("Task.addJob: can't add job in non preparing state" " (%s)" % self.state) if not job.name: raise ValueError("Task.addJob: name is required") name = job.name for j in self.jobs: if name == j.name: raise ValueError("addJob: name '%s' must be unique" % (name)) job.setOwnerTask(self) self.jobs.append(job) self.njobs = len(self.jobs) def clean(self): if not self.store: return if not self.isDone(): raise se.TaskStateError("can't clean in state %s" % self.state) self._clean(self.store) def pushRecovery(self, recovery): """ Add recovery "job" to the task. Recoveries are committed in FILO order. Assumes that all required resources are acquired or registered. """ if not isinstance(recovery, Recovery): raise TypeError("recovery param %s(%s) must be Recovery object" % (repr(recovery), type(recovery))) if not recovery.name: raise ValueError("pushRecovery: name is required") name = recovery.name for r in self.recoveries: if name == r.name: raise ValueError("pushRecovery: name '%s' must be unique" % (name)) recovery.setOwnerTask(self) self.recoveries.append(recovery) self.persist() def replaceRecoveries(self, recovery): if not isinstance(recovery, Recovery): raise TypeError("recovery param %s(%s) must be Recovery object" % (repr(recovery), type(recovery))) if not recovery.name: raise ValueError("replaceRecoveries: name is required") recovery.setOwnerTask(self) rec = Recovery('stubName', 'stubMod', 'stubObj', 'stubFunc', []) while (rec and (rec.name != ROLLBACK_SENTINEL)): rec = self.popRecovery() self.recoveries.append(recovery) self.persist() def popRecovery(self): if self.recoveries: return self.recoveries.pop() def clearRecoveries(self): self.recoveries = [] self.persist() def setManager(self, manager): # If need be, refactor out to "validateManager" method if not hasattr(manager, "queue"): raise se.InvalidTaskMng(unicode(manager)) self.mng = manager def setCleanPolicy(self, clean): self.cleanPolicy = TaskCleanType(clean) def setPersistence(self, store, persistPolicy=TaskPersistType.auto, cleanPolicy=TaskCleanType.auto): self.persistPolicy = TaskPersistType(persistPolicy) self.store = store self.setCleanPolicy(cleanPolicy) if self.persistPolicy != TaskPersistType.none and not self.store: raise se.TaskPersistError("no store defined") taskDir = os.path.join(self.store, self.id) try: getProcPool().fileUtils.createdir(taskDir) except Exception as e: self.log.error("Unexpected error", exc_info=True) raise se.TaskPersistError("%s: cannot access/create taskdir" " %s: %s" % (self, taskDir, e)) if (self.persistPolicy == TaskPersistType.auto and self.state != State.init): self.persist() def setRecoveryPolicy(self, clean): self.recoveryPolicy = TaskRecoveryType(clean) def rollback(self): self.log.debug('(rollback): enter') if self.recoveryPolicy == TaskRecoveryType.none: self.log.debug("rollback is skipped") return if not self.isDone(): raise se.TaskNotFinished("can't rollback in state %s" % self.state) self._doRecover() self.log.debug('(rollback): exit') def persist(self): if self.persistPolicy == TaskPersistType.none: return if not self.store: raise se.TaskPersistError("no store defined") if self.state == State.init: raise se.TaskStateError("can't persist in state %s" % self.state) self._save(self.store) @classmethod def loadTask(cls, store, taskid): t = Task(taskid) if getProcPool().os.path.exists(os.path.join(store, taskid)): ext = "" # TBD: is this the correct order (temp < backup) + should temp # be considered at all? elif getProcPool().os.path.exists(os.path.join(store, taskid + TEMP_EXT)): ext = TEMP_EXT elif getProcPool().os.path.exists(os.path.join(store, taskid + BACKUP_EXT)): ext = BACKUP_EXT else: raise se.TaskDirError("loadTask: no such task dir '%s/%s'" % (store, taskid)) t._load(store, ext) return t @threadlocal_task def prepare(self, func, *args, **kwargs): message = self.error try: self._incref() except se.TaskAborted: self._doAbort() return try: self._updateState(State.preparing) result = None code = 0 try: if func: result = self._run(func, *args, **kwargs) except se.TaskAborted as e: self.log.info("aborting: %s", e) code = e.abortedcode message = e.value if self.aborting(): self.log.debug("Prepare: aborted: %s", message) self._updateResult(code, "Task prepare failed: %s" % (message,), "") raise self.error if self.jobs: self.log.debug("Prepare: %s jobs exist, move to acquiring", self.njobs) self._updateState(State.acquiring) if self.aborting(): self.log.error('failed to acquire task %s', self.id) raise self.error self.log.debug("returning") return dict(uuid=str(self.id)) self.log.debug("finished: %s", result) self._updateResult(0, "OK", result) self._updateState(State.finished) return result finally: self._decref() @threadlocal_task def commit(self, args=None): self.log.debug("committing task: %s", self.id) try: self._incref() except se.TaskAborted: self._doAbort() return try: self._updateState(State.running) finally: self._decref() @contextmanager def abort_callback(self, callback): with self._abort_lock: if self.aborting(): aborting = True else: aborting = False self._abort_callbacks.add(callback) if aborting: callback() try: yield finally: with self._abort_lock: self._abort_callbacks.discard(callback) def _execute_abort_callbacks(self): with self._abort_lock: self._aborting = True abort_callbacks = list(self._abort_callbacks) for callback in abort_callbacks: try: callback() except Exception: self.log.exception('failure running abort callback') def aborting(self): return (self._aborting or self.state == State.aborting or self.state == State.raborting) def stop(self, force=False): self.log.debug("stopping in state %s (force %s)", self.state, force) self._incref(force) try: if self.state.isDone(): self.log.debug("Task already stopped (%s), ignoring", self.state) return elif (self.state.isRecovering() and not force and (self.cleanPolicy == TaskCleanType.auto)): self.log.debug("Task (%s) in recovery and force is false, " "ignoring", self.state) return self._execute_abort_callbacks() self._forceAbort = force finally: self._decref(force) @threadlocal_task def recover(self, args=None): ''' Do not call this function while the task is actually running. this method should only be used to recover tasks state after (vdsmd) restart. ''' self.log.debug('(recover): recovering: state %s', self.state) try: self._incref(force=True) except se.TaskAborted: self._doAbort(True) return try: if self.isDone(): self.log.debug('(recover): task is done: state %s', self.state) return # if we are not during recover, just abort if self.state.canAbort(): self.stop() # if we waited for recovery - keep waiting elif self.state == State.waitrecover: pass # if we started the recovery - restart it elif (self.state == State.racquiring or self.state == State.recovering): self._updateState(State.racquiring, force=True) # else we were during failed recovery - abort it else: self.stop(force=True) finally: self._decref(force=True) self.log.debug('(recover): recovered: state %s', self.state) def getState(self): return str(self.state) def getInfo(self): return dict(id=self.id, verb=self.name) def deprecated_getStatus(self): oReturn = {} oReturn["taskID"] = self.id oReturn["taskState"] = self.state.DEPRECATED_STATE[self.state.state] oReturn["taskResult"] = self.state.DEPRECATED_RESULT[self.state.state] oReturn["code"] = self.result.code oReturn["message"] = self.result.message return oReturn def getStatus(self): oReturn = {} oReturn["state"] = {'code': self.result.code, 'message': self.result.message} oReturn["task"] = {'id': self.id, 'state': str(self.state)} oReturn["result"] = self.result.result return oReturn def getDetails(self): return { "id": self.id, "verb": self.name, "state": str(self.state), "code": self.result.code, "message": self.result.message, "result": self.result.result, "tag": self.tag } def getID(self): return self.id def getTags(self): return self.tag def __str__(self): return str(self.id) # FIXME : Use StringIO and enumerate() def dumpTask(self): s = "Task: %s" % self._dump(self, Task.fields) i = 0 for r in self.recoveries: s += " Recovery%d: %s" % (i, self._dump(r, Recovery.fields)) i += 1 i = 0 for j in self.jobs: s += " Job%d: %s" % (i, self._dump(j, Job.fields)) i += 1 return s def getExclusiveLock( self, namespace, resName, timeout=config.getint('irs', 'task_resource_default_timeout')): self.resOwner.acquire(namespace, resName, resourceManager.EXCLUSIVE, timeout) def getSharedLock(self, namespace, resName, timeout=config.getint('irs', 'task_resource_default_timeout')): self.resOwner.acquire(namespace, resName, resourceManager.SHARED, timeout)
class Task: # External Task info fields = { # field_name: type "id": str, "name": six.text_type, "tag": six.text_type, "store": six.text_type, "recoveryPolicy": TaskRecoveryType, "persistPolicy": TaskPersistType, "cleanPolicy": TaskCleanType, "priority": TaskPriority, "state": State, "njobs": int, "nrecoveries": int, "metadataVersion": int } log = logging.getLogger('storage.TaskManager.Task') def __init__(self, id, name="", tag="", recovery=TaskRecoveryType.none, priority=TaskPriority.low): """ id - Unique ID name - human readable name persist - persistency type: auto-clean/manual-clean/not-persistent """ if not id: id = str(uuid.uuid4()) self.metadataVersion = TASK_METADATA_VERSION self.validateID(id) self.lock = threading.Lock() self.callbackLock = threading.Lock() self.id = str(id) self.name = name self.tag = tag self.priority = priority self.recoveryPolicy = recovery self.persistPolicy = TaskPersistType.none self.cleanPolicy = TaskCleanType.auto self.store = None self.defaultException = None self.state = State(State.init) self.result = TaskResult(0, "Task is initializing", "") self.resOwner = resourceManager.Owner(proxy(self), raiseonfailure=True) self.error = se.TaskAborted("Unknown error encountered") self.mng = None self._abort_lock = threading.Lock() self._abort_callbacks = set() self._aborting = False self._forceAbort = False self.ref = 0 self.recoveries = [] self.jobs = [] self.nrecoveries = 0 # just utility count - used by save/load self.njobs = 0 # just utility count - used by save/load self.log = SimpleLogAdapter(self.log, {"Task": self.id}) def __del__(self): def finalize(log, owner, taskDir): log.warn("Task was autocleaned") owner.releaseAll() if taskDir is not None: getProcPool().fileUtils.cleanupdir(taskDir) if not self.state.isDone(): taskDir = None if (self.cleanPolicy == TaskCleanType.auto and self.store is not None): taskDir = os.path.join(self.store, self.id) t = concurrent.thread( finalize, args=(self.log, self.resOwner, taskDir), name="task/" + self.id[:8]) t.start() def _done(self): self.resOwner.releaseAll() if self.cleanPolicy == TaskCleanType.auto: self.clean() def __state_preparing(self, fromState): pass def __state_blocked(self, fromState): pass def __state_acquiring(self, fromState): if self.resOwner.requestsGranted(): self._updateState(State.queued) def __state_queued(self, fromState): try: self.mng.queue(self) except Exception as e: self._setError(e) self.stop() def __state_running(self, fromState): self._runJobs() def __state_finished(self, fromState): self._done() def __state_aborting(self, fromState): if self.ref > 1: return self.log.debug("_aborting: recover policy %s", self.recoveryPolicy) if self.recoveryPolicy == TaskRecoveryType.auto: self._updateState(State.racquiring) elif self.recoveryPolicy == TaskRecoveryType.none: self._updateState(State.failed) else: self._updateState(State.waitrecover) def __state_waitrecover(self, fromState): pass def __state_racquiring(self, fromState): if self.resOwner.requestsGranted(): self._updateState(State.recovering) def __state_recovering(self, fromState): self._recover() def __state_raborting(self, fromState): if self.ref == 1: self._updateState(State.failed) else: self.log.warn("State was change to 'raborting' " "when ref was not 1.") def __state_recovered(self, fromState): self._done() def __state_failed(self, fromState): self._done() def __state_cleaning(self, fromState): pass def _updateState(self, state, force=False): fromState = self.state requestedState = state if self._aborting: if self.state.canAbort(): state = State.aborting elif self.state.canAbortRecovery() and state != State.recovered: state = State.raborting self._aborting = False if requestedState == state: self.log.debug("moving from state %s -> state %s", fromState, state) else: self.log.debug("moving from state %s -> state %s instead of %s", fromState, state, requestedState) self.state.moveto(state, force) if self.persistPolicy == TaskPersistType.auto: try: self.persist() except Exception: self.log.warning("Task._updateState: failed persisting task" " %s", self.id, exc_info=True) fn = getattr(self, "_Task__state_%s" % state) fn(fromState) def _updateResult(self, code, message, result): self.result.result = result self.result.code = code self.result.message = message @classmethod def validateID(cls, taskID): if not taskID or "." in taskID: raise se.InvalidParameterException("taskID", taskID) @classmethod def _loadMetaFile(cls, filename, obj, fields): try: for line in getProcPool().readLines(filename): # process current line line = line.encode('utf8') if line.find(KEY_SEPARATOR) < 0: continue parts = line.split(KEY_SEPARATOR) if len(parts) != 2: cls.log.warning("Task._loadMetaFile: %s - ignoring line" " '%s'", filename, line) continue field = _eq_decode(parts[0].strip()) value = _eq_decode(parts[1].strip()) if field not in fields: cls.log.warning("Task._loadMetaFile: %s - ignoring field" " %s in line '%s'", filename, field, line) continue ftype = fields[field] setattr(obj, field, ftype(value)) except Exception: cls.log.error("Unexpected error", exc_info=True) raise se.TaskMetaDataLoadError(filename) @classmethod def _dump(cls, obj, fields): lines = [] for field in fields: try: value = six.text_type(getattr(obj, field)) except AttributeError: cls.log.warning("object %s field %s not found" % (obj, field), exc_info=True) else: try: field = _eq_encode(field) value = _eq_encode(value) except ValueError as e: cls.log.warning("Object %s: Cannot encode field %s or " "value %s. Skipping field. %s", obj, field, value, e) else: lines.append("%s %s %s" % (field, KEY_SEPARATOR, value)) return lines @classmethod def _saveMetaFile(cls, filename, obj, fields): try: getProcPool().writeLines(filename, [l.encode('utf-8') + b"\n" for l in cls._dump(obj, fields)]) except Exception: cls.log.error("Unexpected error", exc_info=True) raise se.TaskMetaDataSaveError(filename) def _loadTaskMetaFile(self, taskDir): taskFile = os.path.join(taskDir, self.id + TASK_EXT) self._loadMetaFile(taskFile, self, Task.fields) def _saveTaskMetaFile(self, taskDir): taskFile = os.path.join(taskDir, self.id + TASK_EXT) self._saveMetaFile(taskFile, self, Task.fields) def _loadJobMetaFile(self, taskDir, n): taskFile = os.path.join(taskDir, self.id + JOB_EXT + NUM_SEP + str(n)) self._loadMetaFile(taskFile, self.jobs[n], Job.fields) def _saveJobMetaFile(self, taskDir, n): taskFile = os.path.join(taskDir, self.id + JOB_EXT + NUM_SEP + str(n)) self._saveMetaFile(taskFile, self.jobs[n], Job.fields) def _loadRecoveryMetaFile(self, taskDir, n): taskFile = os.path.join(taskDir, self.id + RECOVER_EXT + NUM_SEP + str(n)) self._loadMetaFile(taskFile, self.recoveries[n], Recovery.fields) def _saveRecoveryMetaFile(self, taskDir, n): taskFile = os.path.join(taskDir, self.id + RECOVER_EXT + NUM_SEP + str(n)) self._saveMetaFile(taskFile, self.recoveries[n], Recovery.fields) def _loadTaskResultMetaFile(self, taskDir): taskFile = os.path.join(taskDir, self.id + RESULT_EXT) self._loadMetaFile(taskFile, self.result, TaskResult.fields) def _saveTaskResultMetaFile(self, taskDir): taskFile = os.path.join(taskDir, self.id + RESULT_EXT) self._saveMetaFile(taskFile, self.result, TaskResult.fields) def _getResourcesKeyList(self, taskDir): keys = [] for path in getProcPool().glob.glob(os.path.join(taskDir, "*" + RESOURCE_EXT)): filename = os.path.basename(path) keys.append(filename[:filename.rfind(RESOURCE_EXT)]) return keys def _load(self, storPath, ext=""): self.log.debug("%s: load from %s, ext '%s'", self, storPath, ext) if self.state != State.init: raise se.TaskMetaDataLoadError("task %s - can't load self: " "not in init state" % self) taskDir = os.path.join(storPath, str(self.id) + str(ext)) if not getProcPool().os.path.exists(taskDir): raise se.TaskDirError("load: no such task dir '%s'" % taskDir) oldid = self.id self._loadTaskMetaFile(taskDir) if self.id != oldid: raise se.TaskMetaDataLoadError("task %s: loaded file do not match" " id (%s != %s)" % (self, self.id, oldid)) if self.state == State.finished: self._loadTaskResultMetaFile(taskDir) for jn in range(self.njobs): self.jobs.append(Job("load", None)) self._loadJobMetaFile(taskDir, jn) self.jobs[jn].setOwnerTask(self) for rn in range(self.nrecoveries): self.recoveries.append(Recovery("load", "load", "load", "load", "")) self._loadRecoveryMetaFile(taskDir, rn) self.recoveries[rn].setOwnerTask(self) def _save(self, storPath): origTaskDir = os.path.join(storPath, self.id) if not getProcPool().os.path.exists(origTaskDir): raise se.TaskDirError("_save: no such task dir '%s'" % origTaskDir) taskDir = os.path.join(storPath, self.id + TEMP_EXT) self.log.debug("_save: orig %s temp %s", origTaskDir, taskDir) if getProcPool().os.path.exists(taskDir): getProcPool().fileUtils.cleanupdir(taskDir) getProcPool().os.mkdir(taskDir) try: self.njobs = len(self.jobs) self.nrecoveries = len(self.recoveries) self._saveTaskMetaFile(taskDir) if self.state == State.finished: self._saveTaskResultMetaFile(taskDir) for jn in range(self.njobs): self._saveJobMetaFile(taskDir, jn) for rn in range(self.nrecoveries): self._saveRecoveryMetaFile(taskDir, rn) except Exception as e: self.log.error("Unexpected error", exc_info=True) try: getProcPool().fileUtils.cleanupdir(taskDir) except: self.log.warning("can't remove temp taskdir %s" % taskDir) raise se.TaskPersistError("%s persist failed: %s" % (self, e)) # Make sure backup dir doesn't exist getProcPool().fileUtils.cleanupdir(origTaskDir + BACKUP_EXT) getProcPool().os.rename(origTaskDir, origTaskDir + BACKUP_EXT) getProcPool().os.rename(taskDir, origTaskDir) getProcPool().fileUtils.cleanupdir(origTaskDir + BACKUP_EXT) getProcPool().fileUtils.fsyncPath(origTaskDir) def _clean(self, storPath): taskDir = os.path.join(storPath, self.id) getProcPool().fileUtils.cleanupdir(taskDir) def _recoverDone(self): # protect agains races with stop/abort self.log.debug("Recover Done: state %s", self.state) while True: try: if self.state == State.recovering: self._updateState(State.recovered) elif self.state == State.raborting: self._updateState(State.failed) return except se.TaskStateTransitionError: self.log.error("Unexpected error", exc_info=True) def _recover(self): self.log.debug("_recover") if not self.state == State.recovering: raise se.TaskStateError("%s: _recover in state %s" % (self, self.state)) try: while self.state == State.recovering: rec = self.popRecovery() self.log.debug("running recovery %s", rec) if not rec: break self._run(rec.run) except Exception as e: self.log.warning("task %s: recovery failed: %s", self, e, exc_info=True) # protect agains races with stop/abort try: if self.state == State.recovering: self._updateState(State.raborting) except se.TaskStateTransitionError: pass self._recoverDone() def resourceAcquired(self, namespace, resource, locktype): # Callback from resourceManager.Owner. May be called by another thread. self._incref() try: self.callbackLock.acquire() try: self.log.debug("_resourcesAcquired: %s.%s (%s)", namespace, resource, locktype) if self.state == State.preparing: return if self.state == State.acquiring: self._updateState(State.acquiring) elif self.state == State.racquiring: self._updateState(State.racquiring) elif self.state == State.blocked: self._updateState(State.preparing) elif (self.state == State.aborting or self.state == State.raborting): self.log.debug("resource %s.%s acquired while in state %s", namespace, resource, self.state) else: raise se.TaskStateError("acquire is not allowed in state" " %s" % self.state) finally: self.callbackLock.release() finally: self._decref() def resourceRegistered(self, namespace, resource, locktype): self._incref() try: self.callbackLock.acquire() try: # Callback from resourceManager.Owner. May be called # by another thread. self.log.debug("_resourcesAcquired: %s.%s (%s)", namespace, resource, locktype) # Protect against races with stop/abort if self.state == State.preparing: self._updateState(State.blocked) finally: self.callbackLock.release() finally: self._decref() def _setError(self, e=se.TaskAborted("Unknown error encountered"), expected=False): if not expected: self.log.exception("Unexpected error") self.error = e def _run(self, fn, *args, **kargs): code = 100 message = "Unknown Error" try: return fn(*args, **kargs) except se.StorageException as e: code = e.code message = str(e) self._setError(e, e.expected) except Exception as e: message = six.text_type(e) self._setError(e) except: self._setError() self.log.debug("Task._run: %s %s %s failed - stopping task", self, args, kargs) self.stop() raise se.TaskAborted(message, code) def _runJobs(self): result = "" code = 100 message = "Unknown Error" i = 0 j = None try: if self.aborting(): raise se.TaskAborted("shutting down") if not self.state == State.running: raise se.TaskStateError("%s: can't run Jobs in state %s" % (self, self.state)) # for now: result is the last job result, jobs are run sequentially for j in self.jobs: if self.aborting(): raise se.TaskAborted("shutting down") self.log.debug("Task.run: running job %s: %s" % (i, j)) self._updateResult( 0, 'running job {0} of {1}'.format(i + 1, len(self.jobs)), '') result = self._run(j.run) if self.aborting(): raise se.TaskAborted("shutting down") if result is None: result = "" i += 1 j = None self._updateResult(0, "%s jobs completed successfully" % i, result) self._updateState(State.finished) self.log.debug('Task.run: exit - success: result %s' % result) return result except se.TaskAborted as e: self.log.debug("aborting: %s", e) message = e.value code = e.abortedcode if not self.aborting(): self.log.error("Aborted exception but not in aborting state") raise self._updateResult(code, message, "") def _doAbort(self, force=False): self.log.debug("Task._doAbort: force %s" % force) self.lock.acquire() # Am I really the last? if self.ref != 0: self.lock.release() return self.ref += 1 self.lock.release() try: try: if (not self.state.canAbort() and (force and not self.state.canAbortRecovery())): self.log.warning("Task._doAbort %s: ignoring - " "at state %s", self, self.state) return self.resOwner.cancelAll() if self.state.canAbort(): self._updateState(State.aborting) else: self._updateState(State.raborting) except se.TaskAborted: self._updateState(State.failed) finally: self.lock.acquire() self.ref -= 1 self.lock.release() # If something horrible went wrong. Just fail the task. if not self.state.isDone(): self.log.warn("Task exited in non terminal state. " "Setting tasks as failed.") self._updateState(State.failed) def _doRecover(self): self.lock.acquire() # Am I really the last? if self.ref != 0: self.lock.release() raise se.TaskHasRefs(six.text_type(self)) self.ref += 1 self.lock.release() try: self._updateState(State.racquiring) finally: self.lock.acquire() self.ref -= 1 self.lock.release() def _incref(self, force=False): self.lock.acquire() try: if self.aborting() and (self._forceAbort or not force): raise se.TaskAborted(six.text_type(self)) self.ref += 1 ref = self.ref return ref finally: self.lock.release() def _decref(self, force=False): self.lock.acquire() self.ref -= 1 ref = self.ref self.lock.release() self.log.debug("ref %d aborting %s", ref, self.aborting()) if ref == 0 and self.aborting(): self._doAbort(force) return ref ########################################################################## # Public Interface # ########################################################################## def setDefaultException(self, exceptionObj): # defaultException must have response method if exceptionObj and not hasattr(exceptionObj, "response"): raise se.InvalidDefaultExceptionException( six.text_type(exceptionObj)) self.defaultException = exceptionObj def setTag(self, tag): if KEY_SEPARATOR in tag: raise ValueError("tag cannot include %s character" % KEY_SEPARATOR) self.tag = six.text_type(tag) def isDone(self): return self.state.isDone() def addJob(self, job): """ Add async job to the task. Assumes all resources are acquired or registered. """ if not self.mng: raise se.UnmanagedTask(six.text_type(self)) if not isinstance(job, Job): raise TypeError("Job param %s(%s) must be Job object" % (repr(job), type(job))) if self.state != State.preparing: raise Exception("Task.addJob: can't add job in non preparing state" " (%s)" % self.state) if not job.name: raise ValueError("Task.addJob: name is required") name = job.name for j in self.jobs: if name == j.name: raise ValueError("addJob: name '%s' must be unique" % (name)) job.setOwnerTask(self) self.jobs.append(job) self.njobs = len(self.jobs) def clean(self): if not self.store: return if not self.isDone(): raise se.TaskStateError("can't clean in state %s" % self.state) self._clean(self.store) def pushRecovery(self, recovery): """ Add recovery "job" to the task. Recoveries are committed in FILO order. Assumes that all required resources are acquired or registered. """ if not isinstance(recovery, Recovery): raise TypeError("recovery param %s(%s) must be Recovery object" % (repr(recovery), type(recovery))) if not recovery.name: raise ValueError("pushRecovery: name is required") name = recovery.name for r in self.recoveries: if name == r.name: raise ValueError("pushRecovery: name '%s' must be unique" % (name)) recovery.setOwnerTask(self) self.recoveries.append(recovery) self.persist() def replaceRecoveries(self, recovery): if not isinstance(recovery, Recovery): raise TypeError("recovery param %s(%s) must be Recovery object" % (repr(recovery), type(recovery))) if not recovery.name: raise ValueError("replaceRecoveries: name is required") recovery.setOwnerTask(self) rec = Recovery('stubName', 'stubMod', 'stubObj', 'stubFunc', []) while (rec and (rec.name != ROLLBACK_SENTINEL)): rec = self.popRecovery() self.recoveries.append(recovery) self.persist() def popRecovery(self): if self.recoveries: return self.recoveries.pop() def clearRecoveries(self): self.recoveries = [] self.persist() def setManager(self, manager): # If need be, refactor out to "validateManager" method if not hasattr(manager, "queue"): raise se.InvalidTaskMng(six.text_type(manager)) self.mng = manager def setCleanPolicy(self, clean): self.cleanPolicy = TaskCleanType(clean) def setPersistence(self, store, persistPolicy=TaskPersistType.auto, cleanPolicy=TaskCleanType.auto): self.persistPolicy = TaskPersistType(persistPolicy) self.store = store self.setCleanPolicy(cleanPolicy) if self.persistPolicy != TaskPersistType.none and not self.store: raise se.TaskPersistError("no store defined") taskDir = os.path.join(self.store, self.id) try: getProcPool().fileUtils.createdir(taskDir) except Exception as e: self.log.error("Unexpected error", exc_info=True) raise se.TaskPersistError("%s: cannot access/create taskdir" " %s: %s" % (self, taskDir, e)) if (self.persistPolicy == TaskPersistType.auto and self.state != State.init): self.persist() def setRecoveryPolicy(self, clean): self.recoveryPolicy = TaskRecoveryType(clean) def rollback(self): self.log.debug('(rollback): enter') if self.recoveryPolicy == TaskRecoveryType.none: self.log.debug("rollback is skipped") return if not self.isDone(): raise se.TaskNotFinished("can't rollback in state %s" % self.state) self._doRecover() self.log.debug('(rollback): exit') def persist(self): if self.persistPolicy == TaskPersistType.none: return if not self.store: raise se.TaskPersistError("no store defined") if self.state == State.init: raise se.TaskStateError("can't persist in state %s" % self.state) self._save(self.store) @classmethod def loadTask(cls, store, taskid): t = Task(taskid) if getProcPool().os.path.exists(os.path.join(store, taskid)): ext = "" # TBD: is this the correct order (temp < backup) + should temp # be considered at all? elif getProcPool().os.path.exists(os.path.join(store, taskid + TEMP_EXT)): ext = TEMP_EXT elif getProcPool().os.path.exists(os.path.join(store, taskid + BACKUP_EXT)): ext = BACKUP_EXT else: raise se.TaskDirError("loadTask: no such task dir '%s/%s'" % (store, taskid)) t._load(store, ext) return t @threadlocal_task def prepare(self, func, *args, **kwargs): message = self.error try: self._incref() except se.TaskAborted: self._doAbort() return try: self._updateState(State.preparing) result = None code = 0 try: if func: result = self._run(func, *args, **kwargs) except se.TaskAborted as e: self.log.info("aborting: %s", e) code = e.abortedcode message = e.value if self.aborting(): self.log.debug("Prepare: aborted: %s", message) self._updateResult(code, "Task prepare failed: %s" % (message,), "") raise self.error if self.jobs: self.log.debug("Prepare: %s jobs exist, move to acquiring", self.njobs) self._updateState(State.acquiring) if self.aborting(): self.log.error('failed to acquire task %s', self.id) raise self.error self.log.debug("returning") return dict(uuid=str(self.id)) self.log.debug("finished: %s", result) self._updateResult(0, "OK", result) self._updateState(State.finished) return result finally: self._decref() @threadlocal_task def commit(self, args=None): self.log.debug("committing task: %s", self.id) try: self._incref() except se.TaskAborted: self._doAbort() return try: self._updateState(State.running) finally: self._decref() @contextmanager def abort_callback(self, callback): with self._abort_lock: if self.aborting(): aborting = True else: aborting = False self._abort_callbacks.add(callback) if aborting: callback() try: yield finally: with self._abort_lock: self._abort_callbacks.discard(callback) def _execute_abort_callbacks(self): with self._abort_lock: self._aborting = True abort_callbacks = list(self._abort_callbacks) for callback in abort_callbacks: try: callback() except Exception: self.log.exception('failure running abort callback') def aborting(self): return (self._aborting or self.state == State.aborting or self.state == State.raborting) def stop(self, force=False): self.log.debug("stopping in state %s (force %s)", self.state, force) self._incref(force) try: if self.state.isDone(): self.log.debug("Task already stopped (%s), ignoring", self.state) return elif (self.state.isRecovering() and not force and (self.cleanPolicy == TaskCleanType.auto)): self.log.debug("Task (%s) in recovery and force is false, " "ignoring", self.state) return self._execute_abort_callbacks() self._forceAbort = force finally: self._decref(force) @threadlocal_task def recover(self, args=None): ''' Do not call this function while the task is actually running. this method should only be used to recover tasks state after (vdsmd) restart. ''' self.log.debug('(recover): recovering: state %s', self.state) try: self._incref(force=True) except se.TaskAborted: self._doAbort(True) return try: if self.isDone(): self.log.debug('(recover): task is done: state %s', self.state) return # if we are not during recover, just abort if self.state.canAbort(): self.stop() # if we waited for recovery - keep waiting elif self.state == State.waitrecover: pass # if we started the recovery - restart it elif (self.state == State.racquiring or self.state == State.recovering): self._updateState(State.racquiring, force=True) # else we were during failed recovery - abort it else: self.stop(force=True) finally: self._decref(force=True) self.log.debug('(recover): recovered: state %s', self.state) def getState(self): return str(self.state) def getInfo(self): return dict(id=self.id, verb=self.name) def deprecated_getStatus(self): oReturn = {} oReturn["taskID"] = self.id oReturn["taskState"] = self.state.DEPRECATED_STATE[self.state.state] oReturn["taskResult"] = self.state.DEPRECATED_RESULT[self.state.state] oReturn["code"] = self.result.code oReturn["message"] = self.result.message return oReturn def getStatus(self): oReturn = {} oReturn["state"] = {'code': self.result.code, 'message': self.result.message} oReturn["task"] = {'id': self.id, 'state': str(self.state)} oReturn["result"] = self.result.result return oReturn def getDetails(self): return { "id": self.id, "verb": self.name, "state": str(self.state), "code": self.result.code, "message": self.result.message, "result": self.result.result, "tag": self.tag } def getID(self): return self.id def getTags(self): return self.tag def __str__(self): return str(self.id) # FIXME : Use six.StringIO and enumerate() # TODO: Or six.ByteIO? def dumpTask(self): s = "Task: %s" % self._dump(self, Task.fields) i = 0 for r in self.recoveries: s += " Recovery%d: %s" % (i, self._dump(r, Recovery.fields)) i += 1 i = 0 for j in self.jobs: s += " Job%d: %s" % (i, self._dump(j, Job.fields)) i += 1 return s def getExclusiveLock( self, namespace, resName, timeout=config.getint('irs', 'task_resource_default_timeout')): self.resOwner.acquire(namespace, resName, resourceManager.EXCLUSIVE, timeout) def getSharedLock(self, namespace, resName, timeout=config.getint('irs', 'task_resource_default_timeout')): self.resOwner.acquire(namespace, resName, resourceManager.SHARED, timeout)
class Request(object): """ Internal request object, don't use directly """ _log = logging.getLogger("storage.ResourceManager.Request") namespace = property(lambda self: self._namespace) name = property(lambda self: self._name) fullName = property(lambda self: "%s.%s" % (self._namespace, self._name)) lockType = property(lambda self: self._lockType) syncRoot = property(lambda self: self._syncRoot) def __init__(self, namespace, name, lockType, callback): self._syncRoot = threading.RLock() self._namespace = namespace self._name = name self._lockType = lockType self._isActive = True self._isCanceled = False self._doneEvent = threading.Event() self._callback = callback self.reqID = str(uuid4()) self._log = SimpleLogAdapter(self._log, {"ResName": self.fullName, "ReqID": self.reqID}) def cancel(self): with self._syncRoot: if not self._isActive: self._log.warn("Tried to cancel a processed request") raise RequestAlreadyProcessedError("Cannot cancel a processed " "request") self._isActive = False self._isCanceled = True self._log.debug("Canceled request") try: self._callback(RequestRef(self), None) except Exception: self._log.warn("Request callback threw an exception", exc_info=True) self._callback = None self._doneEvent.set() def _status(self): with self._syncRoot: if self._isCanceled: return "canceled" if self._doneEvent.isSet(): return "granted" return "waiting" def canceled(self): return self._isCanceled def grant(self): with self._syncRoot: if not self._isActive: self._log.warn("Tried to grant a processed request") raise RequestAlreadyProcessedError("Cannot grant a processed " "request") self._isActive = False self._log.debug("Granted request") self._doneEvent.set() def emit(self, resource): try: ref = RequestRef(self) self._callback(ref, resource) except Exception: self._log.warn("Request callback threw an exception", exc_info=True) def wait(self, timeout=None): return self._doneEvent.wait(timeout) def granted(self): with self._syncRoot: return (not self._isCanceled) and self._doneEvent.isSet() def __str__(self): return "Request for %s - %s: %s" % (self.fullName, self.lockType, self._status())