def __init__(self, next_id=1, max_retired_jobs=1200, on_background_state_change=None): """ Parameters ---------- next_id : int The next Job ID to assign max_retired_jobs : int See attribute of same name. on_background_state_change : function See attribute of same name. """ # The next job ID to assign self._next_id = next_id self._on_background_state_change = on_background_state_change # The job queue which manages the scheduling and # allocation of all jobs. self._job_queue = JobQueue(self._job_queue_on_allocate, self._job_queue_on_free, self._job_queue_on_cancel) # The machines available. # {name: Machine, ...} self._machines = OrderedDict() # The jobs which are currently queued or allocated. # {id: _Job, ...} self._jobs = OrderedDict() # Stores the reasons that jobs have been destroyed, e.g. freed or # killed. This may be periodically cleared. Up to # _max_retired_jobs jobs are retained (after which their # entry in this dict is removed). # {id: reason, ...} self._max_retired_jobs = max_retired_jobs self._retired_jobs = OrderedDict() # Underlying sets containing changed jobs and machines self._changed_jobs = set() self._changed_machines = set() # All the attributes set below are "dynamic state" and cannot be # pickled. They are initialised by calling to _init_dynamic_state and # cleared by calling _del_dynamic_state. # The lock which must be held when manipulating any internal state self._lock = None # The connections to BMPs in the system. # {machine_name: {(c, f): AsyncBMPController, ...}, ...} self._bmp_controllers = None self._init_dynamic_state()
class Controller(object): """An object which allocates jobs to machines and manages said machines' hardware. This object is intended to form the core of a server which manages the queueing and execution of jobs on several SpiNNaker machines at once using a :py:class:`~spalloc_server.job_queue.JobQueue` and interacts with the hardware of said machines using :py:class:`~spalloc_server.async_bmp_controller.AsyncBMPController`. 'Jobs' may be created using the :py:meth:`.create_job` and are allocated a unique ID. Jobs are then queued, allocated and destroyed according to machine availability and user intervention. The state of a job may be queried using methods such as :py:meth:`.get_job_state`. When a job changes state it is added to the :py:attr:`.changed_jobs` set. If a job's state is changed due to a background process (rather than in response to calling a :py:class:`.Controller` method), :py:attr:`.on_background_state_change` is called. :py:class:`~spalloc_server.job_queue.JobQueue` calls callbacks in this object when queued jobs are allocated to machines (:py:meth:`._job_queue_on_allocate`), allocations are freed (:py:meth:`._job_queue_on_free`) or cancelled without being allocated (:py:meth:`._job_queue_on_cancel`). These callback functions implement the bulk of the functionality of this object by recording state changes in jobs and triggering the sending of power/link commands to SpiNNaker machines. Machines may be added, modified and removed at any time by modifying the :py:attr:`.machines` attribute. If a machine is removed or changes significantly, jobs running on the machine are cancelled, otherwise existing jobs should continue to execute or be scheduled on any new machines as appropriate. Finally, once the controller is shut down (and outstanding BMP commands are flushed) using :py:meth:`.stop` and :py:meth:`.join` methods, it may be :py:mod:`pickled <pickle>` and later unpickled to resume operation of the controller from where it left off before it was shut down. Users should, at a regular interval call :py:meth:`.destroy_timed_out_jobs` in order to destroy any queued or running jobs which have not been kept alive recently enough. Unless otherwise indicated, all methods are thread safe. Attributes ---------- max_retired_jobs : int Maximum number of retired jobs to retain the state of. machines : {name: \ :py:class:`~spalloc_server.configuration.Machine`, ...} \ or similar OrderedDict Defines the machines now available to the controller. changed_jobs : set([job_id, ...]) The set of job_ids whose state has changed since the last time this set was accessed. Reading this value clears it. changed_machines : set([machine_name, ...]) The set of machine names whose state has changed since the last time this set was accessed. Reading this value clears it. For example, machines are marked as changed if their tags are changed, if they are added or removed or if a job is allocated or freed on them. on_background_state_change : function() or None A function which is called (from any thread) when any state changes occur in a background process and not as a direct result of calling a method of the controller. The callback function *must not* call any methods of the controller object. Note that this attribute is not pickled and unpicking a controller sets this attribute to None. """ def __init__(self, next_id=1, max_retired_jobs=1200, on_background_state_change=None): """ Parameters ---------- next_id : int The next Job ID to assign max_retired_jobs : int See attribute of same name. on_background_state_change : function See attribute of same name. """ # The next job ID to assign self._next_id = next_id self._on_background_state_change = on_background_state_change # The job queue which manages the scheduling and # allocation of all jobs. self._job_queue = JobQueue(self._job_queue_on_allocate, self._job_queue_on_free, self._job_queue_on_cancel) # The machines available. # {name: Machine, ...} self._machines = OrderedDict() # The jobs which are currently queued or allocated. # {id: _Job, ...} self._jobs = OrderedDict() # Stores the reasons that jobs have been destroyed, e.g. freed or # killed. This may be periodically cleared. Up to # _max_retired_jobs jobs are retained (after which their # entry in this dict is removed). # {id: reason, ...} self._max_retired_jobs = max_retired_jobs self._retired_jobs = OrderedDict() # Underlying sets containing changed jobs and machines self._changed_jobs = set() self._changed_machines = set() # All the attributes set below are "dynamic state" and cannot be # pickled. They are initialised by calling to _init_dynamic_state and # cleared by calling _del_dynamic_state. # The lock which must be held when manipulating any internal state self._lock = None # The connections to BMPs in the system. # {machine_name: {(c, f): AsyncBMPController, ...}, ...} self._bmp_controllers = None self._init_dynamic_state() def __getstate__(self): """Called when pickling this object. This object may only be pickled once :py:meth:`.stop` and :py:meth:`.join` have returned. """ state = self.__dict__.copy() # Do not keep the reference to any state-change callbacks state["_on_background_state_change"] = None # Do not keep references to unpickleable dynamic state state["_bmp_controllers"] = None state["_lock"] = None return state def __setstate__(self, state): """Called when unpickling this object. Note that though the object must be pickled when stopped, the unpickled object will start running immediately. """ self.__dict__.update(state) # Restore callback function pointers in JobQueue (removed by JobQueue # when pickling as Python 2.7 cannot reliably pickle method # references). self._job_queue.on_allocate = self._job_queue_on_allocate self._job_queue.on_free = self._job_queue_on_free self._job_queue.on_cancel = self._job_queue_on_cancel self._init_dynamic_state() def stop(self): """Request that all background threads stop. This will cause all outstanding BMP commands to be flushed. .. warning:: Apart from :py:meth:`.join`, no methods of this controller object may be called once this method has been called. See Also -------- join: to wait for the threads to actually terminate. """ # Stop the BMP controllers for machine in self._machines: for controller in itervalues(self._bmp_controllers[machine]): controller.stop() def join(self): """Block until all background threads have halted and all queued BMP commands completed. """ # Wait for the BMP controller threads for controllers in itervalues(self._bmp_controllers): for controller in itervalues(controllers): controller.join() @property def on_background_state_change(self): with self._lock: return self._on_background_state_change @on_background_state_change.setter def on_background_state_change(self, value): with self._lock: self._on_background_state_change = value @property def max_retired_jobs(self): with self._lock: return self._max_retired_jobs @max_retired_jobs.setter def max_retired_jobs(self, value): with self._lock: self._max_retired_jobs = value while len(self._retired_jobs) > self._max_retired_jobs: self._retired_jobs.pop(next(iter(self._retired_jobs))) @property def machines(self): with self._lock: return self._machines.copy() @machines.setter def machines(self, machines): """Update the set of machines available to the controller. Attempt to update the information about available machines without destroying jobs where possible. Machines are matched with existing machines by name and are only recreated if dimensions or connectivity information is altered. Note that changing the tags, set of dead boards or set of dead links does not destroy any already-allocated jobs but will influence new ones. This function blocks while any removed machine's BMP controllers are shut down. This helps prevent collisions e.g. when renaming a machine. Parameters ---------- machines : {name: \ :py:class:`~spalloc_server.configuration.Machine`, \ ...} or similar OrderedDict Defines the machines now available to the controller. """ shut_down_controllers = list() with self._lock: before = set(self._machines) after = set(machines) # Match old machines with new ones by name added = after - before removed = before - after changed = before.intersection(after) # Filter the set of 'changed' machines, ignoring machines which # have not changed and marking machines with major changes for # re-creation. for name in changed.copy(): old = self._machines[name] new = machines[name] if old == new: # Machine has not changed, ignore it changed.remove(name) elif (old.name != new.name or # Not really needed old.width != new.width or old.height != new.height or old.board_locations != new.board_locations or old.bmp_ips != new.bmp_ips or old.spinnaker_ips != new.spinnaker_ips): # Machine has changed in a major way, recreate it changed.remove(name) removed.add(name) added.add(name) # Make all changes to the job queue atomically to prevent jobs # getting scheduled on machines which then immediately change. with self._job_queue: # Remove all removed machines, accumulating a list of all the # AsyncBMPControllers which have been shut down. for name in removed: # Remove the machine from the queue causing all jobs # allocated on it to be freed and all boards powered down. self._job_queue.remove_machine(name) # Remove the board and its BMP connections old = self._machines.pop(name) shut_down_controllers.extend( itervalues(self._bmp_controllers.pop(name))) # Shut-down the now defunct controllers for controller in shut_down_controllers: controller.stop() def wait_for_old_controllers_to_shutdown(): # All new BMPControllers must wait for all the old # controllers to shut-down first for controller in shut_down_controllers: controller.join() # Update changed machines for name in changed: new = machines[name] self._job_queue.modify_machine(name, tags=new.tags, dead_boards=new.dead_boards, dead_links=new.dead_links) self._machines[name] = new # Add new machines for name in added: new = machines[name] self._machines[name] = new self._create_machine_bmp_controllers( new, wait_for_old_controllers_to_shutdown) self._job_queue.add_machine(name, width=new.width, height=new.height, tags=new.tags, dead_boards=new.dead_boards, dead_links=new.dead_links) # Re-order machines to match the specification for name in machines: # Python 2.7 does not have move_to_end m = self._machines.pop(name) self._machines[name] = m self._job_queue.move_machine_to_end(name) # Mark all effected machines as changed self._changed_machines.update(added) self._changed_machines.update(changed) self._changed_machines.update(removed) @property def changed_jobs(self): with self._lock: changed_jobs = self._changed_jobs self._changed_jobs = set() return changed_jobs @property def changed_machines(self): with self._lock: changed_machines = self._changed_machines self._changed_machines = set() return changed_machines def create_job(self, *args, **kwargs): """Create a new job (i.e. allocation of boards). This function is a wrapper around :py:meth:`JobQueue.create_job() <spalloc_server.job_queue.JobQueue.create_job>` which automatically selects (and returns) a new job_id. As such, the following *additional* (keyword) arguments are accepted: Parameters ---------- owner : str **Required.** The name of the owner of this job. keepalive : float or None *Optional.* The maximum number of seconds which may elapse between a query on this job before it is automatically destroyed. If None, no timeout is used. (Default: 60.0) Returns ------- job_id : int The Job ID assigned to the job. """ with self._lock: # Extract non-allocator arguments owner = kwargs.pop("owner", None) if owner is None: raise TypeError("owner must be specified for all jobs.") keepalive = kwargs.pop("keepalive", 60.0) # Generate a job ID job_id = self._next_id self._next_id += 1 kwargs["job_id"] = job_id # Create job and begin attempting to allocate it job = _Job(id=job_id, owner=owner, keepalive=keepalive, args=args, kwargs=kwargs) self._jobs[job_id] = job self._job_queue.create_job(*args, **kwargs) self._changed_jobs.add(job_id) return job_id def job_keepalive(self, job_id): """Reset the keepalive timer for the specified job. Note all other job-specific functions implicitly call this method. """ with self._lock: job = self._jobs.get(job_id, None) if job is not None and job.keepalive is not None: job.keepalive_until = time.time() + job.keepalive def get_job_state(self, job_id): """Poll the state of a running job. Returns ------- :py:class:`.JobStateTuple` """ with self._lock: self.job_keepalive(job_id) job = self._jobs.get(job_id) if job is not None: # Job is live state = job.state power = job.power keepalive = job.keepalive reason = None start_time = job.start_time elif job_id in self._retired_jobs: # Job has been destroyed at some point state = JobState.destroyed power = None keepalive = None reason = self._retired_jobs[job_id] start_time = None else: # Job ID not recognised state = JobState.unknown power = None keepalive = None reason = None start_time = None return JobStateTuple(state, power, keepalive, reason, start_time) def get_job_machine_info(self, job_id): """Get information about the machine the job has been allocated. Returns ------- :py:class:`.JobMachineInfoTuple` """ with self._lock: self.job_keepalive(job_id) job = self._jobs.get(job_id, None) if job is not None and job.boards is not None: return JobMachineInfoTuple( job.width, job.height, job.connections, job.allocated_machine.name, job.boards) else: # Job doesn't exist or no boards allocated yet return JobMachineInfoTuple(None, None, None, None, None) def power_on_job_boards(self, job_id): """Power on (or reset if already on) boards associated with a job.""" with self._lock: self.job_keepalive(job_id) job = self._jobs.get(job_id) if job is not None and job.boards is not None: self._set_job_power_and_links( job, power=True, link_enable=False) def power_off_job_boards(self, job_id): """Power off boards associated with a job.""" with self._lock: self.job_keepalive(job_id) job = self._jobs.get(job_id) if job is not None and job.boards is not None: self._set_job_power_and_links( job, power=False, link_enable=None) def destroy_job(self, job_id, reason=None): """Destroy a job. When the job is finished, or to terminate it early, this function releases any resources consumed by the job and removes it from any queues. Parameters ---------- reason : str or None *Optional.* A human-readable string describing the reason for the job's destruction. """ with self._lock: job = self._jobs.get(job_id, None) if job is not None: # Free the boards used by the job (the JobQueue will then call # _job_queue_on_free which will trigger power-down and removal # of the job from self._jobs). self._job_queue.destroy_job(job_id, reason) def list_jobs(self): """Enumerate all current jobs. Returns ------- jobs : [:py:class`.JobTuple`, ...] A list of allocated/queued jobs in order of creation from oldest (first) to newest (last). """ with self._lock: job_list = [] for job in itervalues(self._jobs): # Strip "job_id" which is only used internally kwargs = {k: v for k, v in iteritems(job.kwargs) if k != "job_id"} # Machine may not exist allocated_machine_name = None if job.allocated_machine is not None: allocated_machine_name = job.allocated_machine.name job_list.append(JobTuple( job.id, job.owner, job.start_time, job.keepalive, job.state, job.power, job.args, kwargs, allocated_machine_name, job.boards)) return job_list def list_machines(self): """Enumerates all machines known to the system. Returns ------- machines : [:py:class:`.MachineTuple`, ...] The list of machines known to the system in order of priority from highest (first) to lowest (last). """ with self._lock: return [ MachineTuple(machine.name, machine.tags, machine.width, machine.height, machine.dead_boards, machine.dead_links) for machine in itervalues(self._machines) ] def get_board_position(self, machine_name, x, y, z): """Get the physical location of a specified board. Parameters ---------- machine_name : str The name of the machine containing the board. x, y, z : int The logical board location within the machine. Returns ------- (cabinet, frame, board) or None The physical location of the board at the specified location or None if the machine/board are not recognised. """ with self._lock: machine = self._machines.get(machine_name, None) if machine is None: return None else: return machine.board_locations.get((x, y, z), None) def get_board_at_position(self, machine_name, cabinet, frame, board): """Get the logical location of a board at the specified physical location. Parameters ---------- machine_name : str The name of the machine containing the board. cabinet, frame, board : int The physical board location within the machine. Returns ------- (x, y, z) or None The logical location of the board at the specified location or None if the machine/board are not recognised. """ with self._lock: machine = self._machines.get(machine_name, None) if machine is None: return None else: # NB: Assuming this function is only called very rarely, # constructing and maintaining a reverse lookup is not worth # the trouble so instead we just search. for (x, y, z), (c, f, b) in iteritems(machine.board_locations): if (c, f, b) == (cabinet, frame, board): return (x, y, z) else: # No board found return None def where_is(self, **kwargs): """Find out where a SpiNNaker board or chip is located, logically and physically. May be called in one of the following styles:: >>> # Query by logical board coordinate within a machine. >>> where_is(machine=..., x=..., y=..., z=...) >>> # Query by physical board location within a machine. >>> where_is(machine=..., cabinet=..., frame=..., board=...) >>> # Query by chip coordinate (as if the machine were booted as >>> # one large machine). >>> where_is(machine=..., chip_x=..., chip_y=...) >>> # Query by chip coordinate, within the boards allocated to a >>> # job. >>> where_is(job_id=..., chip_x=..., chip_y=...) Returns ------- {"machine": ..., "logical": ..., "physical": ..., "chip": ..., \ "board_chip": ..., "job_chip": ..., "job_id": ...} or None If a board exists at the supplied location, a dictionary giving the location of the board/chip, supplied in a number of alternative forms. If the supplied coordinates do not specify a specific chip, the chip coordinates given are those of the Ethernet connected chip on that board. If no board exists at the supplied position, None is returned instead. ``machine`` gives the name of the machine containing the board. ``logical`` the logical board coordinate, (x, y, z) within the machine. ``physical`` the physical board location, (cabinet, frame, board), within the machine. ``chip`` the coordinates of the chip, (x, y), if the whole machine were booted as a single machine. ``board_chip`` the coordinates of the chip, (x, y), within its board. ``job_id`` is the job ID of the job currently allocated to the board identified or None if the board is not allocated to a job. ``job_chip`` the coordinates of the chip, (x, y), within its job, if a job is allocated to the board or None otherwise. """ with self._lock: # Initially, we normalise the input coordinate into: # # machine_name, chip_x, chip_y # # and then convert this back into all the output formats required. # At various points, if we encounter a board/job/chip which doesn't # exist we'll drop out. keywords = set(kwargs) if keywords == set("machine x y z".split()): # Covert from logical position machine_name = kwargs["machine"] chip_x, chip_y = board_to_chip( kwargs["x"], kwargs["y"], kwargs["z"]) elif keywords == set("machine cabinet frame board".split()): # Covert from physical position (fail if location does not # exist) machine_name = kwargs["machine"] xyz = self.get_board_at_position(machine_name, kwargs["cabinet"], kwargs["frame"], kwargs["board"]) if xyz is None: return None chip_x, chip_y = board_to_chip(*xyz) elif keywords == set("machine chip_x chip_y".split()): # Covert from chip location machine_name = kwargs["machine"] chip_x = kwargs["chip_x"] chip_y = kwargs["chip_y"] elif keywords == set("job_id chip_x chip_y".split()): # Covert from job-relative chip location job = self._jobs.get(kwargs["job_id"], None) if job is None or job.boards is None: return None machine_name = job.allocated_machine.name job_x, job_y, job_z = map(min, zip(*job.boards)) dx, dy = board_to_chip(job_x, job_y, job_z) chip_x = kwargs["chip_x"] + dx chip_y = kwargs["chip_y"] + dy # NB: We double-check later that this coordinate is actually a # board within the boards allocated to the job! else: raise TypeError( "Invalid arguments: {}".format(", ".join(keywords))) # Get the actual Machine machine = self._machines.get(machine_name, None) if machine is None: return None # Compensate chip coordinates for wrap-around chip_w, chip_h = triad_dimensions_to_chips( self._machines[machine_name].width, self._machines[machine_name].height, WrapAround.both) chip_x %= chip_w chip_y %= chip_h # Determine the chip within the board # Workaround: spinn5_chip_coord (until at least Rig 0.13.2) returns # numpy integer types which are not JSON serialiseable. board_chip_x, board_chip_y = map( int, spinn5_chip_coord(chip_x, chip_y)) # Determine the logical board coordinates (and compensate for # wrap-around) x, y, z = chip_to_board(chip_x, chip_y, chip_w, chip_h) # Determine the board's physical location (fail if board does not # exist) cfb = self.get_board_position(machine_name, x, y, z) if cfb is None: return None cabinet, frame, board = cfb # Determine what job is running on that board for job_id, job in iteritems(self._jobs): # NB: If machine is defined, boards must also be defined. if (job.allocated_machine == machine and (x, y, z) in job.boards): # Found the job break else: # No job is allocated to the board job_id = None job = None # If selected by job, make sure the board found is actually running # that job (this won't be the case, e.g. if a user specifies a # board within their machine which is actually dead or allocated to # a neighbouring job) if "job_id" in kwargs and job_id != kwargs["job_id"]: return None # Determine chip coordinate within job if job is not None: # Determine the board coordinate within the job job_x, job_y, job_z = map(min, zip(*job.boards)) job_x = x - job_x job_y = y - job_y job_z = z - job_z # Turn that into a chip coordinate and wrap-around according to # the boards actually available in the allocated machine job_chip_x, job_chip_y = board_to_chip(job_x, job_y, job_z) job_chip = ((job_chip_x + board_chip_x) % job.width, (job_chip_y + board_chip_y) % job.height) else: job_chip = None return { "machine": machine_name, "logical": (x, y, z), "physical": (cabinet, frame, board), "chip": (chip_x, chip_y), "board_chip": (board_chip_x, board_chip_y), "job_id": job_id, "job_chip": job_chip, } def destroy_timed_out_jobs(self): """Destroy any jobs which have timed out.""" with self._lock: now = time.time() for job in list(itervalues(self._jobs)): if (job.keepalive is not None and job.keepalive_until < now): # Job timed out, destroy it self.destroy_job(job.id, "Job timed out.") def _bmp_on_request_complete(self, job, success): """Callback function called by an AsyncBMPController when it completes a previously issued request. This function sets the specified Job's state to JobState.ready when this function has been called job.bmp_requests_until_ready times. This function should be passed partially-called with the job the callback is associated it. Parameters ---------- job : :py:class:`._Job` The job whose state should be set. (To be defined by wrapping this method in a partial). success : bool Command success indicator provided by the AsyncBMPController. """ with self._lock: # If a BMP command failed, cancel the job if not success: self.destroy_job( job.id, "Machine configuration failed, please try again later.") # Count down the number of outstanding requests before the job is # ready job.bmp_requests_until_ready -= 1 assert job.bmp_requests_until_ready >= 0 if job.bmp_requests_until_ready == 0: job.state = JobState.ready # Report state changes for jobs which are still running if job.id in self._jobs: self._changed_jobs.add(job.id) if self._on_background_state_change is not None: self._on_background_state_change() def _set_job_power_and_links(self, job, power, link_enable=None): """Power on/off and configure links for the boards associated with a specific job. Parameters ---------- job : :py:class:`._Job` The job whose boards should be controlled. power : bool The power state to apply to the boards. True = on, False = off. link_enable : bool or None Whether to enable (True) or disable (False) peripheral links or leave them unchanged (None). """ with self._lock: machine = job.allocated_machine on_done = partial(self._bmp_on_request_complete, job) # Group commands by the frame they interact with to allow all # commands within a frame to be sent atomically frame_commands = defaultdict(list) controllers = self._bmp_controllers[machine.name] # Power commands job.bmp_requests_until_ready += len(job.boards) for xyz in job.boards: c, f, b = machine.board_locations[xyz] controller = controllers[(c, f)] frame_commands[controller].append( partial(controller.set_power, b, power, on_done)) # Link state commands if link_enable is not None: job.bmp_requests_until_ready += len(job.periphery) for x, y, z, link in job.periphery: c, f, b = machine.board_locations[(x, y, z)] controller = controllers[(c, f)] frame_commands[controller].append( partial(controller.set_link_enable, b, link, link_enable, on_done)) # Send power/link commands atomically for each frame for controller, commands in iteritems(frame_commands): with controller: for command in commands: command() # Update job state job.state = JobState.power job.power = power self._changed_jobs.add(job.id) def _job_queue_on_allocate(self, job_id, machine_name, boards, periphery, torus): """Called when a job is successfully allocated to a machine.""" with self._lock: # Update job metadata job = self._jobs[job_id] job.allocated_machine = self._machines[machine_name] job.boards = boards job.periphery = periphery job.torus = torus self._changed_jobs.add(job.id) self._changed_machines.add(machine_name) # Compute dimensions of machine the job will run on. Note that the # formulae used below for converting from board to chip coordinates # is only valid when either 'oz' is zero or only a single board is # allocated. Since we only allocate multi-board regions by the # triad this will be the case. ox, oy, oz = min(job.boards) # Origin bx, by, bz = max(job.boards) # Top-right bound # Get system bounds in chips if len(job.boards) > 1: job.width, job.height = triad_dimensions_to_chips((bx-ox) + 1, (by-oy) + 1, job.torus) else: # Special case: single board allocations are always 8x8 job.width = job.height = 8 # Get SpiNNaker chip Ethernet IPs (enumerated in terms of chip # coordinates) job.connections = { board_to_chip(x-ox, y-oy, z-oz): job.allocated_machine.spinnaker_ips[(x, y, z)] for (x, y, z) in job.boards } # Initialise the boards self.power_on_job_boards(job_id) def _job_queue_on_free(self, job_id, reason): """Called when a job is freed.""" self._changed_machines.add(self._jobs[job_id].allocated_machine.name) self._teardown_job(job_id, reason) def _job_queue_on_cancel(self, job_id, reason): """Called when a job is cancelled before having been allocated.""" self._teardown_job(job_id, "Cancelled: {}".format(reason or "")) def _teardown_job(self, job_id, reason): """Called once job has been removed from the JobQueue. Powers down any hardware in use and finally removes the job from _jobs. """ with self._lock: job = self._jobs.pop(job_id) self._retired_jobs[job_id] = reason self._changed_jobs.add(job.id) # Keep the number of retired jobs limited to prevent # accumulating memory consumption forever. if len(self._retired_jobs) > self._max_retired_jobs: self._retired_jobs.pop(next(iter(self._retired_jobs))) # Power-down any boards that were in use if job.boards is not None: self._set_job_power_and_links(job, power=False) def _create_machine_bmp_controllers(self, machine, on_thread_start=None): """Create BMP controllers for a machine.""" with self._lock: controllers = {} for (c, f), hostname in iteritems(machine.bmp_ips): controllers[(c, f)] = AsyncBMPController( hostname, on_thread_start) self._bmp_controllers[machine.name] = controllers def _init_dynamic_state(self): """Initialise all dynamic (non-pickleable) state. Specifically: * Creates the global controller lock * Creates connections to BMPs. * Reset keepalive_until on all existing jobs (e.g. allowing remote devices a chance to reconnect before terminating their jobs). """ # Recreate the lock assert self._lock is None self._lock = threading.RLock() with self._lock: # Create connections to BMPs assert self._bmp_controllers is None self._bmp_controllers = {} for machine in itervalues(self._machines): self._create_machine_bmp_controllers(machine) # Reset keepalives to allow remote clients time to reconnect for job_id in self._jobs: self.job_keepalive(job_id)
def q(on_allocate, on_free, on_cancel): # A job queue with some mock callbacks return JobQueue(on_allocate, on_free, on_cancel)