示例#1
0
class Request(object):
    """
    Internal request object, don't use directly
    """
    _log = logging.getLogger("storage.ResourceManager.Request")
    namespace = property(lambda self: self._namespace)
    name = property(lambda self: self._name)
    full_name = property(lambda self: "%s.%s" % (self._namespace, self._name))
    lockType = property(lambda self: self._lockType)
    syncRoot = property(lambda self: self._syncRoot)

    def __init__(self, namespace, name, lockType, callback):
        self._syncRoot = threading.RLock()
        self._namespace = namespace
        self._name = name
        self._lockType = lockType
        self._isActive = True
        self._isCanceled = False
        self._doneEvent = threading.Event()
        self._callback = callback
        self.reqID = str(uuid4())
        self._log = SimpleLogAdapter(self._log, {
            "ResName": self.full_name,
            "ReqID": self.reqID
        })

    def cancel(self):
        with self._syncRoot:
            if not self._isActive:
                self._log.warning("Tried to cancel a processed request")
                raise RequestAlreadyProcessedError("Cannot cancel a processed "
                                                   "request")

            self._isActive = False
            self._isCanceled = True

            self._log.debug("Canceled request")
            try:
                self._callback(RequestRef(self), None)
            except Exception:
                self._log.warning("Request callback threw an exception",
                                  exc_info=True)
            self._callback = None
            self._doneEvent.set()

    def _status(self):
        with self._syncRoot:
            if self._isCanceled:
                return "canceled"
            if self._doneEvent.isSet():
                return "granted"
            return "waiting"

    def canceled(self):
        return self._isCanceled

    def grant(self):
        with self._syncRoot:
            if not self._isActive:
                self._log.warning("Tried to grant a processed request")
                raise RequestAlreadyProcessedError("Cannot grant a processed "
                                                   "request")

            self._isActive = False
            self._log.debug("Granted request")
            self._doneEvent.set()

    def emit(self, resource):
        try:
            ref = RequestRef(self)
            self._callback(ref, resource)
        except Exception:
            self._log.warning("Request callback threw an exception",
                              exc_info=True)

    def wait(self, timeout=None):
        return self._doneEvent.wait(timeout)

    def granted(self):
        with self._syncRoot:
            return (not self._isCanceled) and self._doneEvent.isSet()

    def __str__(self):
        return "Request for %s - %s: %s" % (self.full_name, self.lockType,
                                            self._status())
示例#2
0
文件: task.py 项目: EdDev/vdsm
class Task:
    # External Task info
    fields = {
        # field_name: type
        "id": str,
        "name": unicode,
        "tag": unicode,
        "store": unicode,
        "recoveryPolicy": TaskRecoveryType,
        "persistPolicy": TaskPersistType,
        "cleanPolicy": TaskCleanType,
        "priority": TaskPriority,
        "state": State,
        "njobs": int,
        "nrecoveries": int,
        "metadataVersion": int
    }

    log = logging.getLogger('storage.TaskManager.Task')

    def __init__(self, id, name="", tag="",
                 recovery=TaskRecoveryType.none,
                 priority=TaskPriority.low):
        """
        id - Unique ID
        name - human readable name
        persist - persistency type: auto-clean/manual-clean/not-persistent
        """

        if not id:
            id = str(uuid.uuid4())
        self.metadataVersion = TASK_METADATA_VERSION
        self.validateID(id)
        self.lock = threading.Lock()
        self.callbackLock = threading.Lock()
        self.id = str(id)
        self.name = name
        self.tag = tag
        self.priority = priority
        self.recoveryPolicy = recovery
        self.persistPolicy = TaskPersistType.none
        self.cleanPolicy = TaskCleanType.auto
        self.store = None
        self.defaultException = None

        self.state = State(State.init)
        self.result = TaskResult(0, "Task is initializing", "")

        self.resOwner = resourceManager.Owner(proxy(self), raiseonfailure=True)
        self.error = se.TaskAborted("Unknown error encountered")

        self.mng = None
        self._abort_lock = threading.Lock()
        self._abort_callbacks = set()
        self._aborting = False
        self._forceAbort = False
        self.ref = 0

        self.recoveries = []
        self.jobs = []
        self.nrecoveries = 0    # just utility count - used by save/load
        self.njobs = 0          # just utility count - used by save/load

        self.log = SimpleLogAdapter(self.log, {"Task": self.id})

    def __del__(self):
        def finalize(log, owner, taskDir):
            log.warn("Task was autocleaned")
            owner.releaseAll()
            if taskDir is not None:
                getProcPool().fileUtils.cleanupdir(taskDir)

        if not self.state.isDone():
            taskDir = None
            if (self.cleanPolicy == TaskCleanType.auto and
                    self.store is not None):
                taskDir = os.path.join(self.store, self.id)
            t = concurrent.thread(
                finalize,
                args=(self.log, self.resOwner, taskDir),
                name="task/" + self.id[:8])
            t.start()

    def _done(self):
        self.resOwner.releaseAll()
        if self.cleanPolicy == TaskCleanType.auto:
            self.clean()

    def __state_preparing(self, fromState):
        pass

    def __state_blocked(self, fromState):
        pass

    def __state_acquiring(self, fromState):
        if self.resOwner.requestsGranted():
            self._updateState(State.queued)

    def __state_queued(self, fromState):
        try:
            self.mng.queue(self)
        except Exception as e:
            self._setError(e)
            self.stop()

    def __state_running(self, fromState):
        self._runJobs()

    def __state_finished(self, fromState):
        self._done()

    def __state_aborting(self, fromState):
        if self.ref > 1:
            return
        self.log.debug("_aborting: recover policy %s", self.recoveryPolicy)
        if self.recoveryPolicy == TaskRecoveryType.auto:
            self._updateState(State.racquiring)
        elif self.recoveryPolicy == TaskRecoveryType.none:
            self._updateState(State.failed)
        else:
            self._updateState(State.waitrecover)

    def __state_waitrecover(self, fromState):
        pass

    def __state_racquiring(self, fromState):
        if self.resOwner.requestsGranted():
            self._updateState(State.recovering)

    def __state_recovering(self, fromState):
        self._recover()

    def __state_raborting(self, fromState):
        if self.ref == 1:
            self._updateState(State.failed)
        else:
            self.log.warn("State was change to 'raborting' "
                          "when ref was not 1.")

    def __state_recovered(self, fromState):
        self._done()

    def __state_failed(self, fromState):
        self._done()

    def __state_cleaning(self, fromState):
        pass

    def _updateState(self, state, force=False):
        fromState = self.state
        requestedState = state
        if self._aborting:
            if self.state.canAbort():
                state = State.aborting
            elif self.state.canAbortRecovery() and state != State.recovered:
                state = State.raborting
        self._aborting = False
        if requestedState == state:
            self.log.debug("moving from state %s -> state %s",
                           fromState, state)
        else:
            self.log.debug("moving from state %s -> state %s instead of %s",
                           fromState, state, requestedState)

        self.state.moveto(state, force)
        if self.persistPolicy == TaskPersistType.auto:
            try:
                self.persist()
            except Exception:
                self.log.warning("Task._updateState: failed persisting task"
                                 " %s", self.id, exc_info=True)

        fn = getattr(self, "_Task__state_%s" % state)
        fn(fromState)

    def _updateResult(self, code, message, result):
        self.result.result = result
        self.result.code = code
        self.result.message = message

    @classmethod
    def validateID(cls, taskID):
        if not taskID or "." in taskID:
            raise se.InvalidParameterException("taskID", taskID)

    @classmethod
    def _loadMetaFile(cls, filename, obj, fields):
        try:
            for line in getProcPool().readLines(filename):
                # process current line
                line = line.encode('utf8')
                if line.find(KEY_SEPARATOR) < 0:
                    continue
                parts = line.split(KEY_SEPARATOR)
                if len(parts) != 2:
                    cls.log.warning("Task._loadMetaFile: %s - ignoring line"
                                    " '%s'", filename, line)
                    continue

                field = _eq_decode(parts[0].strip())
                value = _eq_decode(parts[1].strip())
                if field not in fields:
                    cls.log.warning("Task._loadMetaFile: %s - ignoring field"
                                    " %s in line '%s'", filename, field, line)
                    continue

                ftype = fields[field]
                setattr(obj, field, ftype(value))
        except Exception:
            cls.log.error("Unexpected error", exc_info=True)
            raise se.TaskMetaDataLoadError(filename)

    @classmethod
    def _dump(cls, obj, fields):
        lines = []
        for field in fields:
            try:
                value = unicode(getattr(obj, field))
            except AttributeError:
                cls.log.warning("object %s field %s not found" %
                                (obj, field), exc_info=True)
            else:
                try:
                    field = _eq_encode(field)
                    value = _eq_encode(value)
                except ValueError as e:
                    cls.log.warning("Object %s: Cannot encode field %s or "
                                    "value %s. Skipping field. %s",
                                    obj, field, value, e)
                else:
                    lines.append("%s %s %s" % (field, KEY_SEPARATOR, value))
        return lines

    @classmethod
    def _saveMetaFile(cls, filename, obj, fields):
        try:
            getProcPool().writeLines(filename,
                                     [l.encode('utf8') + "\n"
                                      for l in cls._dump(obj, fields)])
        except Exception:
            cls.log.error("Unexpected error", exc_info=True)
            raise se.TaskMetaDataSaveError(filename)

    def _loadTaskMetaFile(self, taskDir):
        taskFile = os.path.join(taskDir, self.id + TASK_EXT)
        self._loadMetaFile(taskFile, self, Task.fields)

    def _saveTaskMetaFile(self, taskDir):
        taskFile = os.path.join(taskDir, self.id + TASK_EXT)
        self._saveMetaFile(taskFile, self, Task.fields)

    def _loadJobMetaFile(self, taskDir, n):
        taskFile = os.path.join(taskDir, self.id + JOB_EXT + NUM_SEP + str(n))
        self._loadMetaFile(taskFile, self.jobs[n], Job.fields)

    def _saveJobMetaFile(self, taskDir, n):
        taskFile = os.path.join(taskDir, self.id + JOB_EXT + NUM_SEP + str(n))
        self._saveMetaFile(taskFile, self.jobs[n], Job.fields)

    def _loadRecoveryMetaFile(self, taskDir, n):
        taskFile = os.path.join(taskDir,
                                self.id + RECOVER_EXT + NUM_SEP + str(n))
        self._loadMetaFile(taskFile, self.recoveries[n], Recovery.fields)

    def _saveRecoveryMetaFile(self, taskDir, n):
        taskFile = os.path.join(taskDir,
                                self.id + RECOVER_EXT + NUM_SEP + str(n))
        self._saveMetaFile(taskFile, self.recoveries[n], Recovery.fields)

    def _loadTaskResultMetaFile(self, taskDir):
        taskFile = os.path.join(taskDir, self.id + RESULT_EXT)
        self._loadMetaFile(taskFile, self.result, TaskResult.fields)

    def _saveTaskResultMetaFile(self, taskDir):
        taskFile = os.path.join(taskDir, self.id + RESULT_EXT)
        self._saveMetaFile(taskFile, self.result, TaskResult.fields)

    def _getResourcesKeyList(self, taskDir):
        keys = []
        for path in getProcPool().glob.glob(os.path.join(taskDir,
                                                         "*" + RESOURCE_EXT)):
            filename = os.path.basename(path)
            keys.append(filename[:filename.rfind(RESOURCE_EXT)])
        return keys

    def _load(self, storPath, ext=""):
        self.log.debug("%s: load from %s, ext '%s'", self, storPath, ext)
        if self.state != State.init:
            raise se.TaskMetaDataLoadError("task %s - can't load self: "
                                           "not in init state" % self)
        taskDir = os.path.join(storPath, str(self.id) + str(ext))
        if not getProcPool().os.path.exists(taskDir):
            raise se.TaskDirError("load: no such task dir '%s'" % taskDir)
        oldid = self.id
        self._loadTaskMetaFile(taskDir)
        if self.id != oldid:
            raise se.TaskMetaDataLoadError("task %s: loaded file do not match"
                                           " id (%s != %s)" %
                                           (self, self.id, oldid))
        if self.state == State.finished:
            self._loadTaskResultMetaFile(taskDir)
        for jn in range(self.njobs):
            self.jobs.append(Job("load", None))
            self._loadJobMetaFile(taskDir, jn)
            self.jobs[jn].setOwnerTask(self)
        for rn in range(self.nrecoveries):
            self.recoveries.append(Recovery("load", "load",
                                            "load", "load", ""))
            self._loadRecoveryMetaFile(taskDir, rn)
            self.recoveries[rn].setOwnerTask(self)

    def _save(self, storPath):
        origTaskDir = os.path.join(storPath, self.id)
        if not getProcPool().os.path.exists(origTaskDir):
            raise se.TaskDirError("_save: no such task dir '%s'" % origTaskDir)
        taskDir = os.path.join(storPath, self.id + TEMP_EXT)
        self.log.debug("_save: orig %s temp %s", origTaskDir, taskDir)
        if getProcPool().os.path.exists(taskDir):
            getProcPool().fileUtils.cleanupdir(taskDir)
        getProcPool().os.mkdir(taskDir)
        try:
            self.njobs = len(self.jobs)
            self.nrecoveries = len(self.recoveries)
            self._saveTaskMetaFile(taskDir)
            if self.state == State.finished:
                self._saveTaskResultMetaFile(taskDir)
            for jn in range(self.njobs):
                self._saveJobMetaFile(taskDir, jn)
            for rn in range(self.nrecoveries):
                self._saveRecoveryMetaFile(taskDir, rn)
        except Exception as e:
            self.log.error("Unexpected error", exc_info=True)
            try:
                getProcPool().fileUtils.cleanupdir(taskDir)
            except:
                self.log.warning("can't remove temp taskdir %s" % taskDir)
            raise se.TaskPersistError("%s persist failed: %s" % (self, e))
        # Make sure backup dir doesn't exist
        getProcPool().fileUtils.cleanupdir(origTaskDir + BACKUP_EXT)
        getProcPool().os.rename(origTaskDir, origTaskDir + BACKUP_EXT)
        getProcPool().os.rename(taskDir, origTaskDir)
        getProcPool().fileUtils.cleanupdir(origTaskDir + BACKUP_EXT)
        getProcPool().fileUtils.fsyncPath(origTaskDir)

    def _clean(self, storPath):
        taskDir = os.path.join(storPath, self.id)
        getProcPool().fileUtils.cleanupdir(taskDir)

    def _recoverDone(self):
        # protect agains races with stop/abort
        self.log.debug("Recover Done: state %s", self.state)
        while True:
            try:
                if self.state == State.recovering:
                    self._updateState(State.recovered)
                elif self.state == State.raborting:
                    self._updateState(State.failed)
                return
            except se.TaskStateTransitionError:
                self.log.error("Unexpected error", exc_info=True)

    def _recover(self):
        self.log.debug("_recover")
        if not self.state == State.recovering:
            raise se.TaskStateError("%s: _recover in state %s" %
                                    (self, self.state))
        try:
            while self.state == State.recovering:
                rec = self.popRecovery()
                self.log.debug("running recovery %s", rec)
                if not rec:
                    break
                self._run(rec.run)
        except Exception as e:
            self.log.warning("task %s: recovery failed: %s",
                             self, e, exc_info=True)
            # protect agains races with stop/abort
            try:
                if self.state == State.recovering:
                    self._updateState(State.raborting)
            except se.TaskStateTransitionError:
                pass
        self._recoverDone()

    def resourceAcquired(self, namespace, resource, locktype):
        # Callback from resourceManager.Owner. May be called by another thread.
        self._incref()
        try:
            self.callbackLock.acquire()
            try:
                self.log.debug("_resourcesAcquired: %s.%s (%s)",
                               namespace, resource, locktype)
                if self.state == State.preparing:
                    return
                if self.state == State.acquiring:
                    self._updateState(State.acquiring)
                elif self.state == State.racquiring:
                    self._updateState(State.racquiring)
                elif self.state == State.blocked:
                    self._updateState(State.preparing)
                elif (self.state == State.aborting or
                      self.state == State.raborting):
                    self.log.debug("resource %s.%s acquired while in state %s",
                                   namespace, resource, self.state)
                else:
                    raise se.TaskStateError("acquire is not allowed in state"
                                            " %s" % self.state)
            finally:
                self.callbackLock.release()
        finally:
            self._decref()

    def resourceRegistered(self, namespace, resource, locktype):
        self._incref()
        try:
            self.callbackLock.acquire()
            try:
                # Callback from resourceManager.Owner. May be called
                # by another thread.
                self.log.debug("_resourcesAcquired: %s.%s (%s)",
                               namespace, resource, locktype)
                # Protect against races with stop/abort
                if self.state == State.preparing:
                    self._updateState(State.blocked)
            finally:
                self.callbackLock.release()
        finally:
            self._decref()

    def _setError(self, e=se.TaskAborted("Unknown error encountered")):
        self.log.error("Unexpected error", exc_info=True)
        self.error = e

    def _run(self, fn, *args, **kargs):
        code = 100
        message = "Unknown Error"
        try:
            return fn(*args, **kargs)
        except se.StorageException as e:
            code = e.code
            message = e.message
            self._setError(e)
        except Exception as e:
            message = unicode(e)
            self._setError(e)
        except:
            self._setError()

        self.log.debug("Task._run: %s %s %s failed - stopping task",
                       self, args, kargs)
        self.stop()
        raise se.TaskAborted(message, code)

    def _runJobs(self):
        result = ""
        code = 100
        message = "Unknown Error"
        i = 0
        j = None
        try:
            if self.aborting():
                raise se.TaskAborted("shutting down")
            if not self.state == State.running:
                raise se.TaskStateError("%s: can't run Jobs in state %s" %
                                        (self, self.state))
            # for now: result is the last job result, jobs are run sequentially
            for j in self.jobs:
                if self.aborting():
                    raise se.TaskAborted("shutting down")
                self.log.debug("Task.run: running job %s: %s" % (i, j))
                self._updateResult(
                    0, 'running job {0} of {1}'.format(i + 1, len(self.jobs)),
                    '')
                result = self._run(j.run)
                if result is None:
                    result = ""
                i += 1
            j = None
            self._updateResult(0, "%s jobs completed successfully" % i, result)
            self._updateState(State.finished)
            self.log.debug('Task.run: exit - success: result %s' % result)
            return result
        except se.TaskAborted as e:
            self.log.debug("aborting: %s", e)
            message = e.value
            code = e.abortedcode
            if not self.aborting():
                self.log.error("Aborted exception but not in aborting state")
                raise
        self._updateResult(code, message, "")

    def _doAbort(self, force=False):
        self.log.debug("Task._doAbort: force %s" % force)
        self.lock.acquire()
        # Am I really the last?
        if self.ref != 0:
            self.lock.release()
            return
        self.ref += 1
        self.lock.release()
        try:
            try:
                if (not self.state.canAbort() and
                        (force and not self.state.canAbortRecovery())):
                    self.log.warning("Task._doAbort %s: ignoring - "
                                     "at state %s", self, self.state)
                    return
                self.resOwner.cancelAll()
                if self.state.canAbort():
                    self._updateState(State.aborting)
                else:
                    self._updateState(State.raborting)
            except se.TaskAborted:
                self._updateState(State.failed)
        finally:
            self.lock.acquire()
            self.ref -= 1
            self.lock.release()
            # If something horrible went wrong. Just fail the task.
            if not self.state.isDone():
                self.log.warn("Task exited in non terminal state. "
                              "Setting tasks as failed.")
                self._updateState(State.failed)

    def _doRecover(self):
        self.lock.acquire()
        # Am I really the last?
        if self.ref != 0:
            self.lock.release()
            raise se.TaskHasRefs(unicode(self))
        self.ref += 1
        self.lock.release()
        try:
            self._updateState(State.racquiring)
        finally:
            self.lock.acquire()
            self.ref -= 1
            self.lock.release()

    def _incref(self, force=False):
        self.lock.acquire()
        try:
            if self.aborting() and (self._forceAbort or not force):
                raise se.TaskAborted(unicode(self))

            self.ref += 1
            ref = self.ref
            return ref
        finally:
            self.lock.release()

    def _decref(self, force=False):
        self.lock.acquire()
        self.ref -= 1
        ref = self.ref
        self.lock.release()

        self.log.debug("ref %d aborting %s", ref, self.aborting())
        if ref == 0 and self.aborting():
            self._doAbort(force)
        return ref

    ##########################################################################
    # Public Interface                                                       #
    ##########################################################################

    def setDefaultException(self, exceptionObj):
        # defaultException must have response method
        if exceptionObj and not hasattr(exceptionObj, "response"):
            raise se.InvalidDefaultExceptionException(unicode(exceptionObj))
        self.defaultException = exceptionObj

    def setTag(self, tag):
        if KEY_SEPARATOR in tag:
            raise ValueError("tag cannot include %s character" % KEY_SEPARATOR)
        self.tag = unicode(tag)

    def isDone(self):
        return self.state.isDone()

    def addJob(self, job):
        """
        Add async job to the task. Assumes all resources are acquired
        or registered.
        """
        if not self.mng:
            raise se.UnmanagedTask(unicode(self))
        if not isinstance(job, Job):
            raise TypeError("Job param %s(%s) must be Job object" %
                            (repr(job), type(job)))
        if self.state != State.preparing:
            raise Exception("Task.addJob: can't add job in non preparing state"
                            " (%s)" % self.state)
        if not job.name:
            raise ValueError("Task.addJob: name is required")
        name = job.name
        for j in self.jobs:
            if name == j.name:
                raise ValueError("addJob: name '%s' must be unique" % (name))
        job.setOwnerTask(self)
        self.jobs.append(job)
        self.njobs = len(self.jobs)

    def clean(self):
        if not self.store:
            return
        if not self.isDone():
            raise se.TaskStateError("can't clean in state %s" % self.state)
        self._clean(self.store)

    def pushRecovery(self, recovery):
        """
        Add recovery "job" to the task. Recoveries are committed in FILO order.
        Assumes that all required resources are acquired or registered.
        """
        if not isinstance(recovery, Recovery):
            raise TypeError("recovery param %s(%s) must be Recovery object" %
                            (repr(recovery), type(recovery)))
        if not recovery.name:
            raise ValueError("pushRecovery: name is required")
        name = recovery.name
        for r in self.recoveries:
            if name == r.name:
                raise ValueError("pushRecovery: name '%s' must be unique" %
                                 (name))
        recovery.setOwnerTask(self)
        self.recoveries.append(recovery)
        self.persist()

    def replaceRecoveries(self, recovery):
        if not isinstance(recovery, Recovery):
            raise TypeError("recovery param %s(%s) must be Recovery object" %
                            (repr(recovery), type(recovery)))
        if not recovery.name:
            raise ValueError("replaceRecoveries: name is required")
        recovery.setOwnerTask(self)
        rec = Recovery('stubName', 'stubMod', 'stubObj', 'stubFunc', [])
        while (rec and (rec.name != ROLLBACK_SENTINEL)):
            rec = self.popRecovery()
        self.recoveries.append(recovery)
        self.persist()

    def popRecovery(self):
        if self.recoveries:
            return self.recoveries.pop()

    def clearRecoveries(self):
        self.recoveries = []
        self.persist()

    def setManager(self, manager):
        # If need be, refactor out to "validateManager" method
        if not hasattr(manager, "queue"):
            raise se.InvalidTaskMng(unicode(manager))
        self.mng = manager

    def setCleanPolicy(self, clean):
        self.cleanPolicy = TaskCleanType(clean)

    def setPersistence(self, store,
                       persistPolicy=TaskPersistType.auto,
                       cleanPolicy=TaskCleanType.auto):
        self.persistPolicy = TaskPersistType(persistPolicy)
        self.store = store
        self.setCleanPolicy(cleanPolicy)
        if self.persistPolicy != TaskPersistType.none and not self.store:
            raise se.TaskPersistError("no store defined")
        taskDir = os.path.join(self.store, self.id)
        try:
            getProcPool().fileUtils.createdir(taskDir)
        except Exception as e:
            self.log.error("Unexpected error", exc_info=True)
            raise se.TaskPersistError("%s: cannot access/create taskdir"
                                      " %s: %s" % (self, taskDir, e))
        if (self.persistPolicy == TaskPersistType.auto and
                self.state != State.init):
            self.persist()

    def setRecoveryPolicy(self, clean):
        self.recoveryPolicy = TaskRecoveryType(clean)

    def rollback(self):
        self.log.debug('(rollback): enter')
        if self.recoveryPolicy == TaskRecoveryType.none:
            self.log.debug("rollback is skipped")
            return
        if not self.isDone():
            raise se.TaskNotFinished("can't rollback in state %s" % self.state)
        self._doRecover()
        self.log.debug('(rollback): exit')

    def persist(self):
        if self.persistPolicy == TaskPersistType.none:
            return
        if not self.store:
            raise se.TaskPersistError("no store defined")
        if self.state == State.init:
            raise se.TaskStateError("can't persist in state %s" % self.state)
        self._save(self.store)

    @classmethod
    def loadTask(cls, store, taskid):
        t = Task(taskid)
        if getProcPool().os.path.exists(os.path.join(store, taskid)):
            ext = ""
        # TBD: is this the correct order (temp < backup) + should temp
        # be considered at all?
        elif getProcPool().os.path.exists(os.path.join(store,
                                                       taskid + TEMP_EXT)):
            ext = TEMP_EXT
        elif getProcPool().os.path.exists(os.path.join(store,
                                                       taskid + BACKUP_EXT)):
            ext = BACKUP_EXT
        else:
            raise se.TaskDirError("loadTask: no such task dir '%s/%s'" %
                                  (store, taskid))
        t._load(store, ext)
        return t

    @threadlocal_task
    def prepare(self, func, *args, **kwargs):
        message = self.error
        try:
            self._incref()
        except se.TaskAborted:
            self._doAbort()
            return
        try:
            self._updateState(State.preparing)
            result = None
            code = 0
            try:
                if func:
                    result = self._run(func, *args, **kwargs)
            except se.TaskAborted as e:
                self.log.info("aborting: %s", e)
                code = e.abortedcode
                message = e.value

            if self.aborting():
                self.log.debug("Prepare: aborted: %s", message)
                self._updateResult(code, "Task prepare failed: %s" %
                                   (message,), "")
                raise self.error

            if self.jobs:
                self.log.debug("Prepare: %s jobs exist, move to acquiring",
                               self.njobs)
                self._updateState(State.acquiring)
                if self.aborting():
                    self.log.error('failed to acquire task %s', self.id)
                    raise self.error
                self.log.debug("returning")
                return dict(uuid=str(self.id))

            self.log.debug("finished: %s", result)
            self._updateResult(0, "OK", result)
            self._updateState(State.finished)
            return result
        finally:
            self._decref()

    @threadlocal_task
    def commit(self, args=None):
        self.log.debug("committing task: %s", self.id)
        try:
            self._incref()
        except se.TaskAborted:
            self._doAbort()
            return
        try:
            self._updateState(State.running)
        finally:
            self._decref()

    @contextmanager
    def abort_callback(self, callback):
        with self._abort_lock:
            if self.aborting():
                aborting = True
            else:
                aborting = False
                self._abort_callbacks.add(callback)

        if aborting:
            callback()

        try:
            yield
        finally:
            with self._abort_lock:
                self._abort_callbacks.discard(callback)

    def _execute_abort_callbacks(self):
        with self._abort_lock:
            self._aborting = True
            abort_callbacks = list(self._abort_callbacks)

        for callback in abort_callbacks:
            try:
                callback()
            except Exception:
                self.log.exception('failure running abort callback')

    def aborting(self):
        return (self._aborting or
                self.state == State.aborting or
                self.state == State.raborting)

    def stop(self, force=False):
        self.log.debug("stopping in state %s (force %s)", self.state, force)
        self._incref(force)
        try:
            if self.state.isDone():
                self.log.debug("Task already stopped (%s), ignoring",
                               self.state)
                return
            elif (self.state.isRecovering() and
                  not force and
                  (self.cleanPolicy == TaskCleanType.auto)):
                self.log.debug("Task (%s) in recovery and force is false, "
                               "ignoring", self.state)
                return

            self._execute_abort_callbacks()
            self._forceAbort = force
        finally:
            self._decref(force)

    @threadlocal_task
    def recover(self, args=None):
        ''' Do not call this function while the task is actually running. this
            method should only be used to recover tasks state after
            (vdsmd) restart.
        '''
        self.log.debug('(recover): recovering: state %s', self.state)
        try:
            self._incref(force=True)
        except se.TaskAborted:
            self._doAbort(True)
            return
        try:
            if self.isDone():
                self.log.debug('(recover): task is done: state %s', self.state)
                return
            # if we are not during recover, just abort
            if self.state.canAbort():
                self.stop()
            # if we waited for recovery - keep waiting
            elif self.state == State.waitrecover:
                pass
            # if we started the recovery - restart it
            elif (self.state == State.racquiring or
                  self.state == State.recovering):
                self._updateState(State.racquiring, force=True)
            # else we were during failed recovery - abort it
            else:
                self.stop(force=True)
        finally:
            self._decref(force=True)
        self.log.debug('(recover): recovered: state %s', self.state)

    def getState(self):
        return str(self.state)

    def getInfo(self):
        return dict(id=self.id, verb=self.name)

    def deprecated_getStatus(self):
        oReturn = {}
        oReturn["taskID"] = self.id
        oReturn["taskState"] = self.state.DEPRECATED_STATE[self.state.state]
        oReturn["taskResult"] = self.state.DEPRECATED_RESULT[self.state.state]
        oReturn["code"] = self.result.code
        oReturn["message"] = self.result.message
        return oReturn

    def getStatus(self):
        oReturn = {}
        oReturn["state"] = {'code': self.result.code,
                            'message': self.result.message}
        oReturn["task"] = {'id': self.id, 'state': str(self.state)}
        oReturn["result"] = self.result.result
        return oReturn

    def getDetails(self):
        return {
            "id": self.id,
            "verb": self.name,
            "state": str(self.state),
            "code": self.result.code,
            "message": self.result.message,
            "result": self.result.result,
            "tag": self.tag
        }

    def getID(self):
        return self.id

    def getTags(self):
        return self.tag

    def __str__(self):
        return str(self.id)

    # FIXME : Use StringIO and enumerate()
    def dumpTask(self):
        s = "Task: %s" % self._dump(self, Task.fields)
        i = 0
        for r in self.recoveries:
            s += " Recovery%d: %s" % (i, self._dump(r, Recovery.fields))
            i += 1
        i = 0
        for j in self.jobs:
            s += " Job%d: %s" % (i, self._dump(j, Job.fields))
            i += 1
        return s

    def getExclusiveLock(
        self,
        namespace,
        resName,
        timeout=config.getint('irs',
                              'task_resource_default_timeout')):
        self.resOwner.acquire(namespace,
                              resName,
                              resourceManager.EXCLUSIVE,
                              timeout)

    def getSharedLock(self,
                      namespace,
                      resName,
                      timeout=config.getint('irs',
                                            'task_resource_default_timeout')):
        self.resOwner.acquire(namespace,
                              resName,
                              resourceManager.SHARED,
                              timeout)
示例#3
0
文件: task.py 项目: ZhangNatural/vdsm
class Task:
    # External Task info
    fields = {
        # field_name: type
        "id": str,
        "name": six.text_type,
        "tag": six.text_type,
        "store": six.text_type,
        "recoveryPolicy": TaskRecoveryType,
        "persistPolicy": TaskPersistType,
        "cleanPolicy": TaskCleanType,
        "priority": TaskPriority,
        "state": State,
        "njobs": int,
        "nrecoveries": int,
        "metadataVersion": int
    }

    log = logging.getLogger('storage.TaskManager.Task')

    def __init__(self, id, name="", tag="",
                 recovery=TaskRecoveryType.none,
                 priority=TaskPriority.low):
        """
        id - Unique ID
        name - human readable name
        persist - persistency type: auto-clean/manual-clean/not-persistent
        """

        if not id:
            id = str(uuid.uuid4())
        self.metadataVersion = TASK_METADATA_VERSION
        self.validateID(id)
        self.lock = threading.Lock()
        self.callbackLock = threading.Lock()
        self.id = str(id)
        self.name = name
        self.tag = tag
        self.priority = priority
        self.recoveryPolicy = recovery
        self.persistPolicy = TaskPersistType.none
        self.cleanPolicy = TaskCleanType.auto
        self.store = None
        self.defaultException = None

        self.state = State(State.init)
        self.result = TaskResult(0, "Task is initializing", "")

        self.resOwner = resourceManager.Owner(proxy(self), raiseonfailure=True)
        self.error = se.TaskAborted("Unknown error encountered")

        self.mng = None
        self._abort_lock = threading.Lock()
        self._abort_callbacks = set()
        self._aborting = False
        self._forceAbort = False
        self.ref = 0

        self.recoveries = []
        self.jobs = []
        self.nrecoveries = 0    # just utility count - used by save/load
        self.njobs = 0          # just utility count - used by save/load

        self.log = SimpleLogAdapter(self.log, {"Task": self.id})

    def __del__(self):
        def finalize(log, owner, taskDir):
            log.warn("Task was autocleaned")
            owner.releaseAll()
            if taskDir is not None:
                getProcPool().fileUtils.cleanupdir(taskDir)

        if not self.state.isDone():
            taskDir = None
            if (self.cleanPolicy == TaskCleanType.auto and
                    self.store is not None):
                taskDir = os.path.join(self.store, self.id)
            t = concurrent.thread(
                finalize,
                args=(self.log, self.resOwner, taskDir),
                name="task/" + self.id[:8])
            t.start()

    def _done(self):
        self.resOwner.releaseAll()
        if self.cleanPolicy == TaskCleanType.auto:
            self.clean()

    def __state_preparing(self, fromState):
        pass

    def __state_blocked(self, fromState):
        pass

    def __state_acquiring(self, fromState):
        if self.resOwner.requestsGranted():
            self._updateState(State.queued)

    def __state_queued(self, fromState):
        try:
            self.mng.queue(self)
        except Exception as e:
            self._setError(e)
            self.stop()

    def __state_running(self, fromState):
        self._runJobs()

    def __state_finished(self, fromState):
        self._done()

    def __state_aborting(self, fromState):
        if self.ref > 1:
            return
        self.log.debug("_aborting: recover policy %s", self.recoveryPolicy)
        if self.recoveryPolicy == TaskRecoveryType.auto:
            self._updateState(State.racquiring)
        elif self.recoveryPolicy == TaskRecoveryType.none:
            self._updateState(State.failed)
        else:
            self._updateState(State.waitrecover)

    def __state_waitrecover(self, fromState):
        pass

    def __state_racquiring(self, fromState):
        if self.resOwner.requestsGranted():
            self._updateState(State.recovering)

    def __state_recovering(self, fromState):
        self._recover()

    def __state_raborting(self, fromState):
        if self.ref == 1:
            self._updateState(State.failed)
        else:
            self.log.warn("State was change to 'raborting' "
                          "when ref was not 1.")

    def __state_recovered(self, fromState):
        self._done()

    def __state_failed(self, fromState):
        self._done()

    def __state_cleaning(self, fromState):
        pass

    def _updateState(self, state, force=False):
        fromState = self.state
        requestedState = state
        if self._aborting:
            if self.state.canAbort():
                state = State.aborting
            elif self.state.canAbortRecovery() and state != State.recovered:
                state = State.raborting
        self._aborting = False
        if requestedState == state:
            self.log.debug("moving from state %s -> state %s",
                           fromState, state)
        else:
            self.log.debug("moving from state %s -> state %s instead of %s",
                           fromState, state, requestedState)

        self.state.moveto(state, force)
        if self.persistPolicy == TaskPersistType.auto:
            try:
                self.persist()
            except Exception:
                self.log.warning("Task._updateState: failed persisting task"
                                 " %s", self.id, exc_info=True)

        fn = getattr(self, "_Task__state_%s" % state)
        fn(fromState)

    def _updateResult(self, code, message, result):
        self.result.result = result
        self.result.code = code
        self.result.message = message

    @classmethod
    def validateID(cls, taskID):
        if not taskID or "." in taskID:
            raise se.InvalidParameterException("taskID", taskID)

    @classmethod
    def _loadMetaFile(cls, filename, obj, fields):
        try:
            for line in getProcPool().readLines(filename):
                # process current line
                line = line.encode('utf8')
                if line.find(KEY_SEPARATOR) < 0:
                    continue
                parts = line.split(KEY_SEPARATOR)
                if len(parts) != 2:
                    cls.log.warning("Task._loadMetaFile: %s - ignoring line"
                                    " '%s'", filename, line)
                    continue

                field = _eq_decode(parts[0].strip())
                value = _eq_decode(parts[1].strip())
                if field not in fields:
                    cls.log.warning("Task._loadMetaFile: %s - ignoring field"
                                    " %s in line '%s'", filename, field, line)
                    continue

                ftype = fields[field]
                setattr(obj, field, ftype(value))
        except Exception:
            cls.log.error("Unexpected error", exc_info=True)
            raise se.TaskMetaDataLoadError(filename)

    @classmethod
    def _dump(cls, obj, fields):
        lines = []
        for field in fields:
            try:
                value = six.text_type(getattr(obj, field))
            except AttributeError:
                cls.log.warning("object %s field %s not found" %
                                (obj, field), exc_info=True)
            else:
                try:
                    field = _eq_encode(field)
                    value = _eq_encode(value)
                except ValueError as e:
                    cls.log.warning("Object %s: Cannot encode field %s or "
                                    "value %s. Skipping field. %s",
                                    obj, field, value, e)
                else:
                    lines.append("%s %s %s" % (field, KEY_SEPARATOR, value))
        return lines

    @classmethod
    def _saveMetaFile(cls, filename, obj, fields):
        try:
            getProcPool().writeLines(filename,
                                     [l.encode('utf-8') + b"\n"
                                      for l in cls._dump(obj, fields)])
        except Exception:
            cls.log.error("Unexpected error", exc_info=True)
            raise se.TaskMetaDataSaveError(filename)

    def _loadTaskMetaFile(self, taskDir):
        taskFile = os.path.join(taskDir, self.id + TASK_EXT)
        self._loadMetaFile(taskFile, self, Task.fields)

    def _saveTaskMetaFile(self, taskDir):
        taskFile = os.path.join(taskDir, self.id + TASK_EXT)
        self._saveMetaFile(taskFile, self, Task.fields)

    def _loadJobMetaFile(self, taskDir, n):
        taskFile = os.path.join(taskDir, self.id + JOB_EXT + NUM_SEP + str(n))
        self._loadMetaFile(taskFile, self.jobs[n], Job.fields)

    def _saveJobMetaFile(self, taskDir, n):
        taskFile = os.path.join(taskDir, self.id + JOB_EXT + NUM_SEP + str(n))
        self._saveMetaFile(taskFile, self.jobs[n], Job.fields)

    def _loadRecoveryMetaFile(self, taskDir, n):
        taskFile = os.path.join(taskDir,
                                self.id + RECOVER_EXT + NUM_SEP + str(n))
        self._loadMetaFile(taskFile, self.recoveries[n], Recovery.fields)

    def _saveRecoveryMetaFile(self, taskDir, n):
        taskFile = os.path.join(taskDir,
                                self.id + RECOVER_EXT + NUM_SEP + str(n))
        self._saveMetaFile(taskFile, self.recoveries[n], Recovery.fields)

    def _loadTaskResultMetaFile(self, taskDir):
        taskFile = os.path.join(taskDir, self.id + RESULT_EXT)
        self._loadMetaFile(taskFile, self.result, TaskResult.fields)

    def _saveTaskResultMetaFile(self, taskDir):
        taskFile = os.path.join(taskDir, self.id + RESULT_EXT)
        self._saveMetaFile(taskFile, self.result, TaskResult.fields)

    def _getResourcesKeyList(self, taskDir):
        keys = []
        for path in getProcPool().glob.glob(os.path.join(taskDir,
                                                         "*" + RESOURCE_EXT)):
            filename = os.path.basename(path)
            keys.append(filename[:filename.rfind(RESOURCE_EXT)])
        return keys

    def _load(self, storPath, ext=""):
        self.log.debug("%s: load from %s, ext '%s'", self, storPath, ext)
        if self.state != State.init:
            raise se.TaskMetaDataLoadError("task %s - can't load self: "
                                           "not in init state" % self)
        taskDir = os.path.join(storPath, str(self.id) + str(ext))
        if not getProcPool().os.path.exists(taskDir):
            raise se.TaskDirError("load: no such task dir '%s'" % taskDir)
        oldid = self.id
        self._loadTaskMetaFile(taskDir)
        if self.id != oldid:
            raise se.TaskMetaDataLoadError("task %s: loaded file do not match"
                                           " id (%s != %s)" %
                                           (self, self.id, oldid))
        if self.state == State.finished:
            self._loadTaskResultMetaFile(taskDir)
        for jn in range(self.njobs):
            self.jobs.append(Job("load", None))
            self._loadJobMetaFile(taskDir, jn)
            self.jobs[jn].setOwnerTask(self)
        for rn in range(self.nrecoveries):
            self.recoveries.append(Recovery("load", "load",
                                            "load", "load", ""))
            self._loadRecoveryMetaFile(taskDir, rn)
            self.recoveries[rn].setOwnerTask(self)

    def _save(self, storPath):
        origTaskDir = os.path.join(storPath, self.id)
        if not getProcPool().os.path.exists(origTaskDir):
            raise se.TaskDirError("_save: no such task dir '%s'" % origTaskDir)
        taskDir = os.path.join(storPath, self.id + TEMP_EXT)
        self.log.debug("_save: orig %s temp %s", origTaskDir, taskDir)
        if getProcPool().os.path.exists(taskDir):
            getProcPool().fileUtils.cleanupdir(taskDir)
        getProcPool().os.mkdir(taskDir)
        try:
            self.njobs = len(self.jobs)
            self.nrecoveries = len(self.recoveries)
            self._saveTaskMetaFile(taskDir)
            if self.state == State.finished:
                self._saveTaskResultMetaFile(taskDir)
            for jn in range(self.njobs):
                self._saveJobMetaFile(taskDir, jn)
            for rn in range(self.nrecoveries):
                self._saveRecoveryMetaFile(taskDir, rn)
        except Exception as e:
            self.log.error("Unexpected error", exc_info=True)
            try:
                getProcPool().fileUtils.cleanupdir(taskDir)
            except:
                self.log.warning("can't remove temp taskdir %s" % taskDir)
            raise se.TaskPersistError("%s persist failed: %s" % (self, e))
        # Make sure backup dir doesn't exist
        getProcPool().fileUtils.cleanupdir(origTaskDir + BACKUP_EXT)
        getProcPool().os.rename(origTaskDir, origTaskDir + BACKUP_EXT)
        getProcPool().os.rename(taskDir, origTaskDir)
        getProcPool().fileUtils.cleanupdir(origTaskDir + BACKUP_EXT)
        getProcPool().fileUtils.fsyncPath(origTaskDir)

    def _clean(self, storPath):
        taskDir = os.path.join(storPath, self.id)
        getProcPool().fileUtils.cleanupdir(taskDir)

    def _recoverDone(self):
        # protect agains races with stop/abort
        self.log.debug("Recover Done: state %s", self.state)
        while True:
            try:
                if self.state == State.recovering:
                    self._updateState(State.recovered)
                elif self.state == State.raborting:
                    self._updateState(State.failed)
                return
            except se.TaskStateTransitionError:
                self.log.error("Unexpected error", exc_info=True)

    def _recover(self):
        self.log.debug("_recover")
        if not self.state == State.recovering:
            raise se.TaskStateError("%s: _recover in state %s" %
                                    (self, self.state))
        try:
            while self.state == State.recovering:
                rec = self.popRecovery()
                self.log.debug("running recovery %s", rec)
                if not rec:
                    break
                self._run(rec.run)
        except Exception as e:
            self.log.warning("task %s: recovery failed: %s",
                             self, e, exc_info=True)
            # protect agains races with stop/abort
            try:
                if self.state == State.recovering:
                    self._updateState(State.raborting)
            except se.TaskStateTransitionError:
                pass
        self._recoverDone()

    def resourceAcquired(self, namespace, resource, locktype):
        # Callback from resourceManager.Owner. May be called by another thread.
        self._incref()
        try:
            self.callbackLock.acquire()
            try:
                self.log.debug("_resourcesAcquired: %s.%s (%s)",
                               namespace, resource, locktype)
                if self.state == State.preparing:
                    return
                if self.state == State.acquiring:
                    self._updateState(State.acquiring)
                elif self.state == State.racquiring:
                    self._updateState(State.racquiring)
                elif self.state == State.blocked:
                    self._updateState(State.preparing)
                elif (self.state == State.aborting or
                      self.state == State.raborting):
                    self.log.debug("resource %s.%s acquired while in state %s",
                                   namespace, resource, self.state)
                else:
                    raise se.TaskStateError("acquire is not allowed in state"
                                            " %s" % self.state)
            finally:
                self.callbackLock.release()
        finally:
            self._decref()

    def resourceRegistered(self, namespace, resource, locktype):
        self._incref()
        try:
            self.callbackLock.acquire()
            try:
                # Callback from resourceManager.Owner. May be called
                # by another thread.
                self.log.debug("_resourcesAcquired: %s.%s (%s)",
                               namespace, resource, locktype)
                # Protect against races with stop/abort
                if self.state == State.preparing:
                    self._updateState(State.blocked)
            finally:
                self.callbackLock.release()
        finally:
            self._decref()

    def _setError(self, e=se.TaskAborted("Unknown error encountered"),
                  expected=False):
        if not expected:
            self.log.exception("Unexpected error")
        self.error = e

    def _run(self, fn, *args, **kargs):
        code = 100
        message = "Unknown Error"
        try:
            return fn(*args, **kargs)
        except se.StorageException as e:
            code = e.code
            message = str(e)
            self._setError(e, e.expected)
        except Exception as e:
            message = six.text_type(e)
            self._setError(e)
        except:
            self._setError()

        self.log.debug("Task._run: %s %s %s failed - stopping task",
                       self, args, kargs)
        self.stop()
        raise se.TaskAborted(message, code)

    def _runJobs(self):
        result = ""
        code = 100
        message = "Unknown Error"
        i = 0
        j = None
        try:
            if self.aborting():
                raise se.TaskAborted("shutting down")
            if not self.state == State.running:
                raise se.TaskStateError("%s: can't run Jobs in state %s" %
                                        (self, self.state))
            # for now: result is the last job result, jobs are run sequentially
            for j in self.jobs:
                if self.aborting():
                    raise se.TaskAborted("shutting down")
                self.log.debug("Task.run: running job %s: %s" % (i, j))
                self._updateResult(
                    0, 'running job {0} of {1}'.format(i + 1, len(self.jobs)),
                    '')
                result = self._run(j.run)
                if self.aborting():
                    raise se.TaskAborted("shutting down")
                if result is None:
                    result = ""
                i += 1
            j = None
            self._updateResult(0, "%s jobs completed successfully" % i, result)
            self._updateState(State.finished)
            self.log.debug('Task.run: exit - success: result %s' % result)
            return result
        except se.TaskAborted as e:
            self.log.debug("aborting: %s", e)
            message = e.value
            code = e.abortedcode
            if not self.aborting():
                self.log.error("Aborted exception but not in aborting state")
                raise
        self._updateResult(code, message, "")

    def _doAbort(self, force=False):
        self.log.debug("Task._doAbort: force %s" % force)
        self.lock.acquire()
        # Am I really the last?
        if self.ref != 0:
            self.lock.release()
            return
        self.ref += 1
        self.lock.release()
        try:
            try:
                if (not self.state.canAbort() and
                        (force and not self.state.canAbortRecovery())):
                    self.log.warning("Task._doAbort %s: ignoring - "
                                     "at state %s", self, self.state)
                    return
                self.resOwner.cancelAll()
                if self.state.canAbort():
                    self._updateState(State.aborting)
                else:
                    self._updateState(State.raborting)
            except se.TaskAborted:
                self._updateState(State.failed)
        finally:
            self.lock.acquire()
            self.ref -= 1
            self.lock.release()
            # If something horrible went wrong. Just fail the task.
            if not self.state.isDone():
                self.log.warn("Task exited in non terminal state. "
                              "Setting tasks as failed.")
                self._updateState(State.failed)

    def _doRecover(self):
        self.lock.acquire()
        # Am I really the last?
        if self.ref != 0:
            self.lock.release()
            raise se.TaskHasRefs(six.text_type(self))
        self.ref += 1
        self.lock.release()
        try:
            self._updateState(State.racquiring)
        finally:
            self.lock.acquire()
            self.ref -= 1
            self.lock.release()

    def _incref(self, force=False):
        self.lock.acquire()
        try:
            if self.aborting() and (self._forceAbort or not force):
                raise se.TaskAborted(six.text_type(self))

            self.ref += 1
            ref = self.ref
            return ref
        finally:
            self.lock.release()

    def _decref(self, force=False):
        self.lock.acquire()
        self.ref -= 1
        ref = self.ref
        self.lock.release()

        self.log.debug("ref %d aborting %s", ref, self.aborting())
        if ref == 0 and self.aborting():
            self._doAbort(force)
        return ref

    ##########################################################################
    # Public Interface                                                       #
    ##########################################################################

    def setDefaultException(self, exceptionObj):
        # defaultException must have response method
        if exceptionObj and not hasattr(exceptionObj, "response"):
            raise se.InvalidDefaultExceptionException(
                six.text_type(exceptionObj))
        self.defaultException = exceptionObj

    def setTag(self, tag):
        if KEY_SEPARATOR in tag:
            raise ValueError("tag cannot include %s character" % KEY_SEPARATOR)
        self.tag = six.text_type(tag)

    def isDone(self):
        return self.state.isDone()

    def addJob(self, job):
        """
        Add async job to the task. Assumes all resources are acquired
        or registered.
        """
        if not self.mng:
            raise se.UnmanagedTask(six.text_type(self))
        if not isinstance(job, Job):
            raise TypeError("Job param %s(%s) must be Job object" %
                            (repr(job), type(job)))
        if self.state != State.preparing:
            raise Exception("Task.addJob: can't add job in non preparing state"
                            " (%s)" % self.state)
        if not job.name:
            raise ValueError("Task.addJob: name is required")
        name = job.name
        for j in self.jobs:
            if name == j.name:
                raise ValueError("addJob: name '%s' must be unique" % (name))
        job.setOwnerTask(self)
        self.jobs.append(job)
        self.njobs = len(self.jobs)

    def clean(self):
        if not self.store:
            return
        if not self.isDone():
            raise se.TaskStateError("can't clean in state %s" % self.state)
        self._clean(self.store)

    def pushRecovery(self, recovery):
        """
        Add recovery "job" to the task. Recoveries are committed in FILO order.
        Assumes that all required resources are acquired or registered.
        """
        if not isinstance(recovery, Recovery):
            raise TypeError("recovery param %s(%s) must be Recovery object" %
                            (repr(recovery), type(recovery)))
        if not recovery.name:
            raise ValueError("pushRecovery: name is required")
        name = recovery.name
        for r in self.recoveries:
            if name == r.name:
                raise ValueError("pushRecovery: name '%s' must be unique" %
                                 (name))
        recovery.setOwnerTask(self)
        self.recoveries.append(recovery)
        self.persist()

    def replaceRecoveries(self, recovery):
        if not isinstance(recovery, Recovery):
            raise TypeError("recovery param %s(%s) must be Recovery object" %
                            (repr(recovery), type(recovery)))
        if not recovery.name:
            raise ValueError("replaceRecoveries: name is required")
        recovery.setOwnerTask(self)
        rec = Recovery('stubName', 'stubMod', 'stubObj', 'stubFunc', [])
        while (rec and (rec.name != ROLLBACK_SENTINEL)):
            rec = self.popRecovery()
        self.recoveries.append(recovery)
        self.persist()

    def popRecovery(self):
        if self.recoveries:
            return self.recoveries.pop()

    def clearRecoveries(self):
        self.recoveries = []
        self.persist()

    def setManager(self, manager):
        # If need be, refactor out to "validateManager" method
        if not hasattr(manager, "queue"):
            raise se.InvalidTaskMng(six.text_type(manager))
        self.mng = manager

    def setCleanPolicy(self, clean):
        self.cleanPolicy = TaskCleanType(clean)

    def setPersistence(self, store,
                       persistPolicy=TaskPersistType.auto,
                       cleanPolicy=TaskCleanType.auto):
        self.persistPolicy = TaskPersistType(persistPolicy)
        self.store = store
        self.setCleanPolicy(cleanPolicy)
        if self.persistPolicy != TaskPersistType.none and not self.store:
            raise se.TaskPersistError("no store defined")
        taskDir = os.path.join(self.store, self.id)
        try:
            getProcPool().fileUtils.createdir(taskDir)
        except Exception as e:
            self.log.error("Unexpected error", exc_info=True)
            raise se.TaskPersistError("%s: cannot access/create taskdir"
                                      " %s: %s" % (self, taskDir, e))
        if (self.persistPolicy == TaskPersistType.auto and
                self.state != State.init):
            self.persist()

    def setRecoveryPolicy(self, clean):
        self.recoveryPolicy = TaskRecoveryType(clean)

    def rollback(self):
        self.log.debug('(rollback): enter')
        if self.recoveryPolicy == TaskRecoveryType.none:
            self.log.debug("rollback is skipped")
            return
        if not self.isDone():
            raise se.TaskNotFinished("can't rollback in state %s" % self.state)
        self._doRecover()
        self.log.debug('(rollback): exit')

    def persist(self):
        if self.persistPolicy == TaskPersistType.none:
            return
        if not self.store:
            raise se.TaskPersistError("no store defined")
        if self.state == State.init:
            raise se.TaskStateError("can't persist in state %s" % self.state)
        self._save(self.store)

    @classmethod
    def loadTask(cls, store, taskid):
        t = Task(taskid)
        if getProcPool().os.path.exists(os.path.join(store, taskid)):
            ext = ""
        # TBD: is this the correct order (temp < backup) + should temp
        # be considered at all?
        elif getProcPool().os.path.exists(os.path.join(store,
                                                       taskid + TEMP_EXT)):
            ext = TEMP_EXT
        elif getProcPool().os.path.exists(os.path.join(store,
                                                       taskid + BACKUP_EXT)):
            ext = BACKUP_EXT
        else:
            raise se.TaskDirError("loadTask: no such task dir '%s/%s'" %
                                  (store, taskid))
        t._load(store, ext)
        return t

    @threadlocal_task
    def prepare(self, func, *args, **kwargs):
        message = self.error
        try:
            self._incref()
        except se.TaskAborted:
            self._doAbort()
            return
        try:
            self._updateState(State.preparing)
            result = None
            code = 0
            try:
                if func:
                    result = self._run(func, *args, **kwargs)
            except se.TaskAborted as e:
                self.log.info("aborting: %s", e)
                code = e.abortedcode
                message = e.value

            if self.aborting():
                self.log.debug("Prepare: aborted: %s", message)
                self._updateResult(code, "Task prepare failed: %s" %
                                   (message,), "")
                raise self.error

            if self.jobs:
                self.log.debug("Prepare: %s jobs exist, move to acquiring",
                               self.njobs)
                self._updateState(State.acquiring)
                if self.aborting():
                    self.log.error('failed to acquire task %s', self.id)
                    raise self.error
                self.log.debug("returning")
                return dict(uuid=str(self.id))

            self.log.debug("finished: %s", result)
            self._updateResult(0, "OK", result)
            self._updateState(State.finished)
            return result
        finally:
            self._decref()

    @threadlocal_task
    def commit(self, args=None):
        self.log.debug("committing task: %s", self.id)
        try:
            self._incref()
        except se.TaskAborted:
            self._doAbort()
            return
        try:
            self._updateState(State.running)
        finally:
            self._decref()

    @contextmanager
    def abort_callback(self, callback):
        with self._abort_lock:
            if self.aborting():
                aborting = True
            else:
                aborting = False
                self._abort_callbacks.add(callback)

        if aborting:
            callback()

        try:
            yield
        finally:
            with self._abort_lock:
                self._abort_callbacks.discard(callback)

    def _execute_abort_callbacks(self):
        with self._abort_lock:
            self._aborting = True
            abort_callbacks = list(self._abort_callbacks)

        for callback in abort_callbacks:
            try:
                callback()
            except Exception:
                self.log.exception('failure running abort callback')

    def aborting(self):
        return (self._aborting or
                self.state == State.aborting or
                self.state == State.raborting)

    def stop(self, force=False):
        self.log.debug("stopping in state %s (force %s)", self.state, force)
        self._incref(force)
        try:
            if self.state.isDone():
                self.log.debug("Task already stopped (%s), ignoring",
                               self.state)
                return
            elif (self.state.isRecovering() and
                  not force and
                  (self.cleanPolicy == TaskCleanType.auto)):
                self.log.debug("Task (%s) in recovery and force is false, "
                               "ignoring", self.state)
                return

            self._execute_abort_callbacks()
            self._forceAbort = force
        finally:
            self._decref(force)

    @threadlocal_task
    def recover(self, args=None):
        ''' Do not call this function while the task is actually running. this
            method should only be used to recover tasks state after
            (vdsmd) restart.
        '''
        self.log.debug('(recover): recovering: state %s', self.state)
        try:
            self._incref(force=True)
        except se.TaskAborted:
            self._doAbort(True)
            return
        try:
            if self.isDone():
                self.log.debug('(recover): task is done: state %s', self.state)
                return
            # if we are not during recover, just abort
            if self.state.canAbort():
                self.stop()
            # if we waited for recovery - keep waiting
            elif self.state == State.waitrecover:
                pass
            # if we started the recovery - restart it
            elif (self.state == State.racquiring or
                  self.state == State.recovering):
                self._updateState(State.racquiring, force=True)
            # else we were during failed recovery - abort it
            else:
                self.stop(force=True)
        finally:
            self._decref(force=True)
        self.log.debug('(recover): recovered: state %s', self.state)

    def getState(self):
        return str(self.state)

    def getInfo(self):
        return dict(id=self.id, verb=self.name)

    def deprecated_getStatus(self):
        oReturn = {}
        oReturn["taskID"] = self.id
        oReturn["taskState"] = self.state.DEPRECATED_STATE[self.state.state]
        oReturn["taskResult"] = self.state.DEPRECATED_RESULT[self.state.state]
        oReturn["code"] = self.result.code
        oReturn["message"] = self.result.message
        return oReturn

    def getStatus(self):
        oReturn = {}
        oReturn["state"] = {'code': self.result.code,
                            'message': self.result.message}
        oReturn["task"] = {'id': self.id, 'state': str(self.state)}
        oReturn["result"] = self.result.result
        return oReturn

    def getDetails(self):
        return {
            "id": self.id,
            "verb": self.name,
            "state": str(self.state),
            "code": self.result.code,
            "message": self.result.message,
            "result": self.result.result,
            "tag": self.tag
        }

    def getID(self):
        return self.id

    def getTags(self):
        return self.tag

    def __str__(self):
        return str(self.id)

    # FIXME : Use six.StringIO and enumerate()
    # TODO: Or six.ByteIO?
    def dumpTask(self):
        s = "Task: %s" % self._dump(self, Task.fields)
        i = 0
        for r in self.recoveries:
            s += " Recovery%d: %s" % (i, self._dump(r, Recovery.fields))
            i += 1
        i = 0
        for j in self.jobs:
            s += " Job%d: %s" % (i, self._dump(j, Job.fields))
            i += 1
        return s

    def getExclusiveLock(
        self,
        namespace,
        resName,
        timeout=config.getint('irs',
                              'task_resource_default_timeout')):
        self.resOwner.acquire(namespace,
                              resName,
                              resourceManager.EXCLUSIVE,
                              timeout)

    def getSharedLock(self,
                      namespace,
                      resName,
                      timeout=config.getint('irs',
                                            'task_resource_default_timeout')):
        self.resOwner.acquire(namespace,
                              resName,
                              resourceManager.SHARED,
                              timeout)
示例#4
0
class Request(object):
    """
    Internal request object, don't use directly
    """
    _log = logging.getLogger("storage.ResourceManager.Request")
    namespace = property(lambda self: self._namespace)
    name = property(lambda self: self._name)
    fullName = property(lambda self: "%s.%s" % (self._namespace, self._name))
    lockType = property(lambda self: self._lockType)
    syncRoot = property(lambda self: self._syncRoot)

    def __init__(self, namespace, name, lockType, callback):
        self._syncRoot = threading.RLock()
        self._namespace = namespace
        self._name = name
        self._lockType = lockType
        self._isActive = True
        self._isCanceled = False
        self._doneEvent = threading.Event()
        self._callback = callback
        self.reqID = str(uuid4())
        self._log = SimpleLogAdapter(self._log, {"ResName": self.fullName,
                                                 "ReqID": self.reqID})

    def cancel(self):
        with self._syncRoot:
            if not self._isActive:
                self._log.warn("Tried to cancel a processed request")
                raise RequestAlreadyProcessedError("Cannot cancel a processed "
                                                   "request")

            self._isActive = False
            self._isCanceled = True

            self._log.debug("Canceled request")
            try:
                self._callback(RequestRef(self), None)
            except Exception:
                self._log.warn("Request callback threw an exception",
                               exc_info=True)
            self._callback = None
            self._doneEvent.set()

    def _status(self):
        with self._syncRoot:
            if self._isCanceled:
                return "canceled"
            if self._doneEvent.isSet():
                return "granted"
            return "waiting"

    def canceled(self):
        return self._isCanceled

    def grant(self):
        with self._syncRoot:
            if not self._isActive:
                self._log.warn("Tried to grant a processed request")
                raise RequestAlreadyProcessedError("Cannot grant a processed "
                                                   "request")

            self._isActive = False
            self._log.debug("Granted request")
            self._doneEvent.set()

    def emit(self, resource):
        try:
            ref = RequestRef(self)
            self._callback(ref, resource)
        except Exception:
            self._log.warn("Request callback threw an exception",
                           exc_info=True)

    def wait(self, timeout=None):
        return self._doneEvent.wait(timeout)

    def granted(self):
        with self._syncRoot:
            return (not self._isCanceled) and self._doneEvent.isSet()

    def __str__(self):
        return "Request for %s - %s: %s" % (self.fullName, self.lockType,
                                            self._status())