async def handle_agent_job_done(self, agent_addr, message: AgentJobDone): """Handle an AgentJobDone message. Send the data back to the client, and start new job if needed""" if agent_addr in self._registered_agents: if message.job_id in self._job_running: self._logger.info("Job %s %s finished on agent %s", message.job_id[0], message.job_id[1], agent_addr) # Remove the job from the list of running jobs del self._job_running[message.job_id] # The agent is available now self._available_agents.append(agent_addr) else: self._logger.warning("Job result %s %s from agent %s was not running", message.job_id[0], message.job_id[1], agent_addr) # Sent the data back to the client, even if we didn't know the job. This ensure everything can recover # in case of problems. await ZMQUtils.send_with_addr(self._client_socket, message.job_id[0], BackendJobDone(message.job_id[1], message.result, message.grade, message.problems, message.tests, message.custom, message.state, message.archive, message.stdout, message.stderr)) else: self._logger.warning("Job result %s %s from non-registered agent %s", message.job_id[0], message.job_id[1], agent_addr) # update the queue await self.update_queue()
async def handle_agent_job_done(self, agent_addr, message: AgentJobDone): """Handle an AgentJobDone message. Send the data back to the client, and start new job if needed""" if agent_addr in self._registered_agents: if message.job_id not in self._job_running: self._logger.warning( "Job result %s from agent %s was not running", message.job_id, agent_addr) else: self._logger.info("Job %s finished on agent %s", message.job_id, agent_addr) # Remove the job from the list of running jobs running_job = self._job_running.pop(message.job_id) # The agent is available now self._available_agents.append(agent_addr) await ZMQUtils.send_with_addr( self._client_socket, running_job.client_addr, BackendJobDone(message.job_id, message.result, message.grade, message.problems, message.tests, message.custom, message.state, message.archive, message.stdout, message.stderr)) else: self._logger.warning("Job result %s from non-registered agent %s", message.job_id, agent_addr) # update the queue await self.update_queue()
async def handle_client_new_job(self, client_addr, message: ClientNewJob): """ Handle an ClientNewJob message. Add a job to the queue and triggers an update """ if message.job_id in self._waiting_jobs or message.job_id in self._job_running: self._logger.info( "Client %s asked to add a job with id %s to the queue, but it's already inside. " "Duplicate random id, message repeat are possible causes, " "and both should be inprobable at best.", client_addr, message.job_id) await ZMQUtils.send_with_addr( self._client_socket, client_addr, BackendJobDone(message.job_id, ("crash", "Duplicate job id"), 0.0, {}, {}, {}, "", None, "", "")) return self._logger.info("Adding a new job %s %s to the queue", client_addr, message.job_id) job = WaitingJob(message.priority, time.time(), client_addr, message.job_id, message) self._waiting_jobs[message.job_id] = job self._waiting_jobs_pq.put( (message.environment_type, message.environment, message.environment_parameters["ssh_allowed"]), job) await self.update_queue()
async def _handle_job_abort(self, job_id: str, task, callback, ssh_callback): await self._handle_job_done( BackendJobDone(job_id, ("crash", "Backend unavailable, retry later"), 0.0, {}, {}, {}, "", None, "", ""), task, callback, ssh_callback)
async def handle_client_kill_job(self, client_addr, message: ClientKillJob): """ Handle an ClientKillJob message. Remove a job from the waiting list or send the kill message to the right agent. """ # Check if the job is not in the queue if (client_addr, message.job_id) in self._waiting_jobs: # Erase the job reference in priority queue job = self._waiting_jobs.pop((client_addr, message.job_id)) job[-1] = None # Do not forget to send a JobDone await ZMQUtils.send_with_addr( self._client_socket, client_addr, BackendJobDone(message.job_id, ("killed", "You killed the job"), 0.0, {}, {}, {}, "", None, "", "")) # If the job is running, transmit the info to the agent elif (client_addr, message.job_id) in self._job_running: agent_addr = self._job_running[(client_addr, message.job_id)][0] await ZMQUtils.send_with_addr( self._agent_socket, agent_addr, BackendKillJob((client_addr, message.job_id))) else: self._logger.warning("Client %s attempted to kill unknown job %s", str(client_addr), str(message.job_id))
async def _recover_jobs(self): """ Recover the jobs sent to a crashed agent """ for (client_addr, job_id), (agent_addr, job_msg, _) in reversed(list(self._job_running.items())): if agent_addr not in self._registered_agents: await ZMQUtils.send_with_addr(self._client_socket, client_addr, BackendJobDone(job_id, ("crash", "Agent restarted"), 0.0, {}, {}, {}, "", None, None, None)) del self._job_running[(client_addr, job_id)] await self.update_queue()
async def handle_agent_job_done(self, agent_addr, message: AgentJobDone): """Handle an AgentJobDone message. Send the data back to the client, and start new job if needed""" self._logger.debug("job %s %s finished on agent %s", message.job_id[0], message.job_id[1], agent_addr) # Remove the job from the list of running jobs del self._job_running[message.job_id] # Sent the data back to the client await ZMQUtils.send_with_addr(self._client_socket, message.job_id[0], BackendJobDone(message.job_id[1], message.result, message.grade, message.problems, message.tests, message.custom, message.archive, message.stdout, message.stderr)) # The agent is available now self._available_agents.append(agent_addr) # update the queue await self.update_queue()