def process_jobs(self, ch, method, properties, jobrecord): ''' Work out dependancies and order ''' # Create a Job instance from the job record job = Job().load(jobrecord) # Register the job with the dispatcher self._register_job(job) self.log.info('Registered job: {0}'.format(job.state.id)) # Return registration success message to client _prop = pika.BasicProperties(correlation_id=properties.correlation_id) self.channel.basic_publish(exchange='', routing_key=properties.reply_to, properties=_prop, body=str(job.state.id)) # Work out the first tasks to run self.log.debug('Decomposing job; calculating first tasks to run') tasks_to_run = job.get_next_tasks_to_run() #This was added to fill out any id args in tasks right at the beginning for task in tasks_to_run: if task.state.args is not None: task = job.update_task_args(task) for task in tasks_to_run: task.state.status = 'SUBMITTED' self.publish_task(task.state.save())
def process_jobs(self, ch, method, properties, jobrecord): ''' Work out dependancies and order ''' # Create a Job instance from the job record job = Job().load(jobrecord) # Register the job with the dispatcher #self._register_job(job) self._persist_job(job) self.log.info('Registered job: {0} in DB'.format(job.state.id)) # Return registration success message to client _prop = pika.BasicProperties(correlation_id=properties.correlation_id) self.channel.basic_publish(exchange='', routing_key=properties.reply_to, properties=_prop, body=str(job.state.id)) # Work out the first tasks to run self.log.debug('Decomposing job; calculating first tasks to run') tasks_to_run = job.get_next_tasks_to_run() #This was added to fill out any id args in tasks right at the beginning for task in tasks_to_run: if task.state.args is not None: task = job.update_task_args(task) for task in tasks_to_run: task.state.status = 'SUBMITTED' if not task.state.start_time: task.state.start_time = time.time() if task.state.timeout: self.log.debug("Task {0} timeout in {1}".format(task.state.id,str(task.state.timeout))) threading.Timer(task.state.timeout, self._caretaker).start() self.publish_task(task.state.save()) #Now we've decided what to do with Job lets update the DB self.log.debug("Updating to DB job: {0}".format(job.state.id)) self._update_job(job)
def _caretaker(self): self.log.info("Caretaker waiting on lock...") self.ct_lock.acquire() self.log.info("Caretaker Running...") dbI=self.db(self.databaseHost,self.databasePort,self.databaseInstance) incomplete = dbI.getincompletetasks() for task_id in incomplete: jobid = dbI.getjobid(task_id) jobrecord = dbI.getjob(jobid) #TODO get task record not job record job = Job().load(jobrecord) for task in job.state.tasks: if task.state.id == task_id and task.state.timeout and task.state.start_time: if task.state.timeout < (time.time() - task.state.start_time): self.log.info("Setting task {0} from job {1} as FAILED".format(task.state.id,job.state.id)) task.state.status = 'FAILED' task.state.end_time = time.time() job.state.status = 'FAILED' job.state.end_time = time.time() job.save() dbI.updatejob(job) self.ct_lock.release()
def process_results(self, ch, method, properties, taskrecord): ''' Processing results received from workers and end points ''' self.log.info( 'Received task results for job {0}'.format( properties.correlation_id)) # Check if task is registered to this dispatcher if self._retreive_job(properties.correlation_id) is not None: # Re-Register the job with the dispatcher jobrecord = self._retreive_job(properties.correlation_id) job = Job().load(jobrecord) self.log.info('Found job in DB: {0}'.format(job.state.id)) self.log.info('Task results: {0}'.format(taskrecord)) # Turn the taskrecord into a project Task instance updated_task = Task().load(taskrecord) # Update the job with the new task results job.update_tasks(updated_task, force=True) self._start_next_task(job) elif properties.correlation_id == 'update_task': self.log.info('Task results: {0}'.format(taskrecord)) # Turn the taskrecord into a project Task instance updated_task = Task().load(taskrecord) number_of_updated_tasks = 0 jobid = self._retreive_jobid(updated_task.state.id) jobrecord = self._retreive_job(jobid) if jobrecord is not None: job = Job().load(jobrecord) self.log.info('Found in DB job: {0}'.format(job.state.id)) for task in job.state.tasks: if updated_task.state.id == task.state.id: job.update_tasks(updated_task) number_of_updated_tasks += 1 self._start_next_task(job) if number_of_updated_tasks == 0: self.log.warn('Task with id {0} not found in its parent job (possible?)'.format( updated_task.state.id)) else: self.log.warn('No parent job found for Task with id {0}'.format( updated_task.state.id)) else: self.log.warn('No job found for job ID: {0}'.format(properties.correlation_id))