def run_next():
            while True:
                runnable_jobs = self.job_scheduler._job_collection.ready_jobs

                log.info(
                    "run_next: %d runnable jobs of (%d pending, %d tasked)" % (
                        len(runnable_jobs),
                        len(self.job_scheduler._job_collection.pending_jobs),
                        len(self.job_scheduler._job_collection.tasked_jobs),
                    ))

                if not runnable_jobs:
                    break

                dep_cache = DepCache()
                ok_jobs, cancel_jobs = self.job_scheduler._check_jobs(
                    runnable_jobs, dep_cache)
                self.job_scheduler._job_collection.update_many(
                    ok_jobs, "tasked")
                for job in cancel_jobs:
                    self.job_scheduler._complete_job(job, False, True)
                for job in ok_jobs:
                    self.job_scheduler._spawn_job(job)

                self.drain_progress(skip_advance=True)
Пример #2
0
 def __init__(self, lock_cache, job_collection):
     self._dep_cache = DepCache()
     self._lock_cache = lock_cache
     self._job_collection = job_collection
Пример #3
0
class CommandPlan(object):
    """This class is responsible for translating requests to run jobs or
    to change the state of the system into Command objects with associated
    Jobs.

    """
    def __init__(self, lock_cache, job_collection):
        self._dep_cache = DepCache()
        self._lock_cache = lock_cache
        self._job_collection = job_collection

    def get_expected_state(self, stateful_object_instance):
        try:
            return self.expected_states[stateful_object_instance]
        except KeyError:
            return stateful_object_instance.state

    def _create_locks(self, job):
        """Create StateLock instances based on a Job's dependencies, and
        add in any extras the job returns from Job.create_locks

        """
        locks = []
        # Take read lock on everything from job.self._dep_cache.get
        for dependency in self._dep_cache.get(job).all():
            locks.append(
                StateLock(job=job,
                          locked_item=dependency.stateful_object,
                          write=False))

        if isinstance(job, StateChangeJob):
            stateful_object = job.get_stateful_object()

            # Take read lock on everything from get_stateful_object's self._dep_cache.get if
            # this is a StateChangeJob.  We do things depended on by both the old
            # and the new state: e.g. if we are taking a mount from unmounted->mounted
            # then we need to lock the new state's requirement of lnet_up, whereas
            # if we're going from mounted->unmounted we need to lock the old state's
            # requirement of lnet_up (to prevent someone stopping lnet while
            # we're still running)
            from itertools import chain

            for d in chain(
                    self._dep_cache.get(stateful_object, job.old_state).all(),
                    self._dep_cache.get(stateful_object,
                                        job.state_transition.new_state).all(),
            ):
                locks.append(
                    StateLock(job=job,
                              locked_item=d.stateful_object,
                              write=False))

            # Take a write lock on get_stateful_object if this is a StateChangeJob
            locks.append(
                StateLock(
                    job=job,
                    locked_item=stateful_object,
                    begin_state=job.old_state,
                    end_state=job.state_transition.new_state,
                    write=True,
                ))

        locks.extend(job.create_locks())

        return locks

    def add_jobs(self, jobs, command):
        """Add a job, and any others which are required in order to reach its prerequisite state"""
        # Important: the Job must not be committed until all
        # its dependencies and locks are in.
        assert not transaction.get_autocommit()

        for job in jobs:
            for dependency in self._dep_cache.get(job).all():
                if not dependency.satisfied():
                    log.info("add_jobs: setting required dependency %s %s" %
                             (dependency.stateful_object,
                              dependency.preferred_state))
                    self._set_state(dependency.get_stateful_object(),
                                    dependency.preferred_state, command)
            log.info("add_jobs: done checking dependencies")
            locks = self._create_locks(job)
            job.locks_json = json.dumps([l.to_dict() for l in locks])
            self._create_dependencies(job, locks)
            job.save()

            log.info("add_jobs: created Job %s (%s)" %
                     (job.pk, job.description()))

            for l in locks:
                self._lock_cache.add(l)

            command.jobs.add(job)

        self._job_collection.add_command(command, jobs)

    def get_transition_consequences(self, instance, new_state):
        """For use in the UI, for warning the user when an
           action is going to have some consequences which
           affect an object other than the one they are operating
           on directly.  Because this is UI rather than business
           logic, we take some shortcuts here:
            * Don't calculate expected_states, i.e. ignore running
              jobs and generate output based on the actual committed
              states of objects
            * Don't bother sorting for execution order - output an
              unordered list.
        """
        from chroma_core.models import StatefulObject

        assert isinstance(instance, StatefulObject)

        self.expected_states = {}
        self.deps = set()
        self.edges = set()
        self._emit_transition_deps(
            Transition(instance, self.get_expected_state(instance), new_state))

        log.debug("Transition %s %s->%s:" %
                  (instance, self.get_expected_state(instance), new_state))
        for d in self.deps:
            log.debug("  dep %s" % (d, ))
        for e in self.edges:
            log.debug("  edge [%s]->[%s]" % e)
        self.deps = self._sort_graph(self.deps, self.edges)

        depended_jobs = []
        transition_job = None
        for d in self.deps:
            job = d.to_job()
            if isinstance(job, StateChangeJob):
                so = getattr(job, job.stateful_object)
                stateful_object_id = so.pk
                stateful_object_content_type_id = ContentType.objects.get_for_model(
                    so).pk
            else:
                stateful_object_id = None
                stateful_object_content_type_id = None

            description = {
                "class": job.__class__.__name__,
                "requires_confirmation": job.get_requires_confirmation(),
                "confirmation_prompt": job.get_confirmation_string(),
                "description": job.description(),
                "stateful_object_id": stateful_object_id,
                "stateful_object_content_type_id":
                stateful_object_content_type_id,
            }

            if d == self.deps[-1]:
                transition_job = description
            else:
                depended_jobs.append(description)

        return {
            "transition_job": transition_job,
            "dependency_jobs": depended_jobs
        }

    def _create_dependencies(self, job, locks):
        """Examine overlaps between a job's locks and those of
           earlier jobs which are still pending, and generate wait_for
           dependencies when we have a write lock and they have a read lock
           or generate depend_on dependencies when we have a read or write lock and
           they have a write lock"""
        wait_fors = set()
        for lock in locks:
            if lock.write:
                wl = lock
                # Depend on the most recent pending write to this stateful object,
                # trust that it will have depended on any before that.
                prior_write_lock = self._lock_cache.get_latest_write(
                    wl.locked_item, not_job=job)
                if prior_write_lock:
                    if wl.begin_state and prior_write_lock.end_state:
                        assert wl.begin_state == prior_write_lock.end_state, (
                            "%s locks %s in state %s but previous %s leaves it in state %s"
                            %
                            (job, wl.locked_item, wl.begin_state,
                             prior_write_lock.job, prior_write_lock.end_state))
                    wait_fors.add(prior_write_lock.job.id)
                    # We will only wait_for read locks after this write lock, as it
                    # will have wait_for'd any before it.
                    read_barrier_id = prior_write_lock.job.id
                else:
                    read_barrier_id = 0

                # Wait for any reads of the stateful object between the last write and
                # our position.
                prior_read_locks = self._lock_cache.get_read_locks(
                    wl.locked_item, after=read_barrier_id, not_job=job)
                for i in prior_read_locks:
                    wait_fors.add(i.job.id)
            else:
                rl = lock
                prior_write_lock = self._lock_cache.get_latest_write(
                    rl.locked_item, not_job=job)
                if prior_write_lock:
                    # See comment by locked_state in StateReadLock
                    wait_fors.add(prior_write_lock.job.id)

        wait_fors = list(wait_fors)
        job.wait_for_json = json.dumps(wait_fors)

    def _sort_graph(self, objects, edges):
        """Sort items in a graph by their longest path from a leaf.  Items
           at the start of the result are the leaves.  Roots come last."""
        object_edges = defaultdict(list)
        for e in edges:
            parent, child = e
            object_edges[parent].append(child)

        leaf_distance_cache = {}

        def leaf_distance(obj, depth=0, hops=0):
            if obj in leaf_distance_cache:
                return leaf_distance_cache[obj] + hops

            depth = depth + 1
            max_child_hops = hops
            for child in object_edges[obj]:
                child_hops = leaf_distance(child, depth, hops + 1)
                max_child_hops = max(child_hops, max_child_hops)

            leaf_distance_cache[obj] = max_child_hops - hops

            return max_child_hops

        object_leaf_distances = []
        for o in objects:
            object_leaf_distances.append((o, leaf_distance(o)))

        object_leaf_distances.sort(lambda x, y: cmp(x[1], y[1]))
        return [obj for obj, ld in object_leaf_distances]

    def _set_state(self, instance, new_state, command):
        """Return a Job or None if the object is already in new_state.
        command_id should refer to a command instance or be None."""

        log.info("set_state: %s-%s transitioning from %s to %s" %
                 (instance.__class__, instance.id, instance.state, new_state))

        from chroma_core.models import StatefulObject

        assert isinstance(instance, StatefulObject)

        # Get the computed list of valid transition states away from the
        # current state.
        try:
            available_states = instance.downcast().get_available_states(
                instance.state)
        except AttributeError:
            available_states = instance.get_available_states(instance.state)

        # Append the current state as a valid transition state; there is
        # specific code to deal with that scenario later.
        available_states += [instance.state]

        if new_state not in available_states:
            raise SchedulingError(
                "State '%s' is invalid for %s, must be one of %s" %
                (new_state, instance.__class__, available_states))

        # Work out the eventual states (and which writelock'ing job to depend on to
        # ensure that state) from all non-'complete' jobs in the queue
        # It is possible the some locks have no end state and so these should be excluded.
        item_to_lock = self._lock_cache.get_write_by_locked_item()
        self.expected_states = dict([
            (item, state_lock.end_state)
            for item, state_lock in item_to_lock.items()
            if state_lock.end_state
        ])

        if new_state == self.get_expected_state(instance):
            log.info("set_state: already expected to be in state %s" %
                     new_state)
            if instance.state != new_state:
                # This is a no-op because of an in-progress Job:
                job = self._lock_cache.get_latest_write(instance).job
                log.info("set_state: state %s to be reached by job %s" %
                         (new_state, job.id))
                command.jobs.add(job)
                self._job_collection.add_command(command, [job])

            # Pick out whichever job made it so, and attach that to the Command
            return None

        self.deps = set()
        self.edges = set()
        self._emit_transition_deps(
            Transition(instance, self.get_expected_state(instance), new_state))

        # This sort is done to make the following true:
        #  The order of the rows in the Job table corresponds to the order in which
        #  the jobs would run (including accounting for dependencies) in the absence
        #  of parallelism.
        self.deps = self._sort_graph(self.deps, self.edges)

        jobs = []
        for d in self.deps:
            # Create and save the Job instance
            job = d.to_job()
            locks = self._create_locks(job)
            job.locks_json = json.dumps([l.to_dict() for l in locks])
            self._create_dependencies(job, locks)
            job.save()
            jobs.append(job)
            for l in locks:
                self._lock_cache.add(l)
            log.debug("  dep %s -> Job %s" % (d, job.pk))
            command.jobs.add(job)

        command.save()
        self._job_collection.add_command(command, jobs)

    def _emit_transition_deps(self, transition, transition_stack={}):
        if transition in self.deps:
            log.debug("emit_transition_deps: %s already scheduled" %
                      (transition))
            return transition
        else:
            log.debug("emit_transition_deps: %s" % (transition))
            pass

        # Update our worldview to record that any subsequent dependencies may
        # assume that we are in our new state
        transition_stack = dict(transition_stack.items())
        transition_stack[transition.stateful_object] = transition.new_state
        log.debug("Updating transition_stack[%s/%s] = %s" %
                  (transition.stateful_object.__class__,
                   transition.stateful_object.id, transition.new_state))

        # do nothing for a NOOP transition
        if transition.old_state == transition.new_state:
            log.debug("NOOP transition %s -> %s" %
                      (transition.old_state, transition.new_state))
            return None

        # E.g. for 'unformatted'->'registered' for a ManagedTarget we
        # would get ['unformatted', 'formatted', 'registered']
        route = transition.stateful_object.get_route(transition.old_state,
                                                     transition.new_state)
        log.debug("emit_transition_deps: route %s" % (route, ))

        # Add to self.deps and self.edges for each step in the route
        prev = None
        for i in range(0, len(route) - 1):
            dep_transition = Transition(transition.stateful_object, route[i],
                                        route[i + 1])
            self.deps.add(dep_transition)
            self._collect_dependencies(dep_transition, transition_stack)
            if prev:
                self.edges.add((dep_transition, prev))
            prev = dep_transition

        return prev

    def _collect_dependencies(self, root_transition, transition_stack):
        if not hasattr(self, "cdc"):
            self.cdc = defaultdict(list)
        if root_transition in self.cdc:
            return

        log.debug("collect_dependencies: %s" % root_transition)
        # What is explicitly required for this state transition?
        transition_deps = self._dep_cache.get(root_transition.to_job())
        for dependency in transition_deps.all():
            from chroma_core.lib.job import DependOn

            assert isinstance(dependency, DependOn)
            old_state = self.get_expected_state(dependency.stateful_object)
            log.debug("cd %s/%s %s %s %s" % (
                dependency.stateful_object.__class__,
                dependency.stateful_object.id,
                old_state,
                dependency.acceptable_states,
                id(dependency.stateful_object),
            ))

            if not old_state in dependency.acceptable_states:
                dep_transition = self._emit_transition_deps(
                    Transition(dependency.stateful_object, old_state,
                               dependency.preferred_state), transition_stack)
                self.edges.add((root_transition, dep_transition))

        def get_mid_transition_expected_state(object):
            try:
                return transition_stack[object]
            except KeyError:
                return self.get_expected_state(object)

        # What will statically be required in our new state?
        stateful_deps = self._dep_cache.get(root_transition.stateful_object,
                                            root_transition.new_state)
        for dependency in stateful_deps.all():
            if dependency.stateful_object in transition_stack:
                continue
            # When we start running it will be in old_state
            old_state = get_mid_transition_expected_state(
                dependency.stateful_object)

            # Is old_state not what we want?
            if old_state and not old_state in dependency.acceptable_states:
                log.debug("new state static requires = %s %s %s" %
                          (dependency.stateful_object, old_state,
                           dependency.acceptable_states))
                # Emit some transitions to get depended_on into depended_state
                dep_transition = self._emit_transition_deps(
                    Transition(dependency.stateful_object, old_state,
                               dependency.preferred_state), transition_stack)
                # Record that root_dep depends on depended_on making it into depended_state
                self.edges.add((root_transition, dep_transition))

        # What was depending on our old state?
        # Iterate over all objects which *might* depend on this one
        for dependent in root_transition.stateful_object.get_dependent_objects(
        ):
            if dependent in transition_stack:
                continue

            # What state do we expect the dependent to be in?
            dependent_state = get_mid_transition_expected_state(dependent)
            for dependency in self._dep_cache.get(dependent,
                                                  dependent_state).all():
                if (dependency.stateful_object
                        == root_transition.stateful_object
                        and not root_transition.new_state
                        in dependency.acceptable_states):
                    assert dependency.fix_state is not None, (
                        "A reverse dependency must provide a fix_state: %s in state %s depends on %s in state %s"
                        % (dependent, dependent_state,
                           root_transition.stateful_object,
                           dependency.acceptable_states))

                    if hasattr(dependency.fix_state, "__call__"):
                        fix_state = dependency.fix_state(
                            root_transition.new_state)
                    else:
                        fix_state = dependency.fix_state

                    log.debug(
                        "Reverse dependency: %s in state %s required %s to be in state %s (but will be %s), fixing by setting it to state %s"
                        % (
                            dependent,
                            dependent_state,
                            root_transition.stateful_object.id,
                            dependency.acceptable_states,
                            root_transition.new_state,
                            fix_state,
                        ))

                    dep_transition = self._emit_transition_deps(
                        Transition(dependent, dependent_state, fix_state),
                        transition_stack)
                    self.edges.add((root_transition, dep_transition))

    def command_run_jobs(self, job_dicts, message):
        assert len(job_dicts) > 0

        jobs = []
        for job in job_dicts:
            job_klass = ContentType.objects.get_by_natural_key(
                "chroma_core", job["class_name"].lower()).model_class()
            job_instance = job_klass(**job["args"])
            jobs.append(job_instance)

        with transaction.atomic():
            command = Command.objects.create(message=message)
            log.debug("command_run_jobs: command %s" % command.id)
            for job in jobs:
                log.debug("command_run_jobs:  job %s" % job)
            self.add_jobs(jobs, command)

        return command.id

    def command_set_state(self, object_ids, message, command=None):
        if not command:
            command = Command.objects.create(message=message)
        for ct_nk, o_pk, state in object_ids:
            model_klass = ContentType.objects.get_by_natural_key(
                *ct_nk).model_class()
            instance = model_klass.objects.get(pk=o_pk)
            self._set_state(instance, state, command)

        log.info("Created command %s (%s) with %s jobs" %
                 (command.id, command.message, command.jobs.count()))
        if command.jobs.count() == 0:
            command.complete(False, False)

        return command

    def command_run_jobs_preserve_states(self, job_dicts, preserve_objects,
                                         message):
        """
        :param job_dicts: jobs to run
        :param preserve_objects: list of objects that should have there state restored (preserved) after the jobs have run.
        :param message: Message for the command being run.
        :return: id of command that contains the jobs.
        """

        command_id = self.command_run_jobs(job_dicts, message)
        self.command_set_state(
            [(ContentType.objects.get_for_model(
                type(object)).natural_key(), object.id, object.state)
             for object in preserve_objects],
            None,
            Command.objects.get(id=command_id),
        )

        return command_id