def run(self): self.tool.start_time = self.tool.last_report = time.time() self.tool.log_report('START', force=True) reply_loc = { 'addr': self.beanstalkd_reply.addr, 'tube': self.beanstalkd_reply.tube } # pylint: disable=no-member thread = threading.Thread(target=self._distribute_events, args=[reply_loc]) thread.start() # Wait until the thread is started sending events while self.sending is None: sleep(0.1) # Retrieve responses until all events are processed try: while not self._all_events_are_processed(): tasks_res = self.beanstalkd_reply.fetch_job( self._tasks_res_from_res_event, timeout=DISTRIBUTED_DISPATCHER_TIMEOUT) for task_res in tasks_res: self.tool.update_counters(task_res) yield task_res self.tool.log_report('RUN') except OioTimeout: self.logger.error('No response for %d seconds', DISTRIBUTED_DISPATCHER_TIMEOUT) self.tool.success = False except Exception: # pylint: disable=broad-except self.logger.exception('ERROR in distributed dispatcher') self.tool.success = False self.tool.log_report('DONE', force=True)
def handle_running_job(self, job_id, job_type, job_info): """ First launch the computation of total number of tasks, then launch the dispatchnig of all tasks across the platform. """ if job_info['tasks']['all_sent']: self.logger.info( '[job_id=%s] All tasks are already sent', job_id) return job_class = JOB_TYPES[job_type] job = job_class(self.conf, logger=self.logger) if job_info['tasks']['total'] == 0 \ and job_info['tasks']['is_total_temp'] \ and job_info['tasks']['sent'] == 0 \ and not job_info['tasks']['all_sent']: job.prepare(job_info['config']['params']) if job_id in self.compute_total_tasks_threads: self.logger.info( '[job_id=%s] Already computing the total number of tasks', job_id) elif job_info['tasks']['is_total_temp']: compute_total_tasks_thread = threading.Thread( target=self.safe_compute_total_tasks, args=(job_id, job_type, job_info, job)) compute_total_tasks_thread.start() self.compute_total_tasks_threads[job_id] = \ compute_total_tasks_thread else: self.logger.info( '[job_id=%s] The total number of tasks is already computed', job_id) if job_id in self.dispatch_tasks_threads: self.logger.warning( '[job_id=%s] Already dispatching the tasks', job_id) else: dispatch_tasks_thread = threading.Thread( target=self.safe_dispatch_tasks, args=(job_id, job_type, job_info, job)) dispatch_tasks_thread.start() self.dispatch_tasks_threads[job_id] = dispatch_tasks_thread
def run_forever(self): """ Take jobs from the queue and spawn threads to dispatch them """ # gather beanstalkd info self.refresh_beanstalkd_workers_thread = threading.Thread( target=self.refresh_beanstalkd_workers_forever) self.refresh_beanstalkd_workers_thread.start() # start processing replies self.listen_beanstalkd_reply_thread = threading.Thread( target=self.listen_beanstalkd_reply_forever) self.listen_beanstalkd_reply_thread.start() if not self.running: return # restart running jobs self.logger.debug('Look for unfinished jobs') orchestrator_jobs, exc = self.handle_backend_errors( self.backend.list_orchestrator_jobs, self.orchestrator_id) if exc is not None: self.logger.warn( 'Unable to list running jobs for this orchestrator: %s', exc) return for job_info in orchestrator_jobs: if not self.running: return self.safe_handle_running_job(job_info) # run next jobs while self.running: sleep(1) job_info, exc = self.handle_backend_errors( self.backend.run_next, self.orchestrator_id) if exc is not None: self.logger.warn('Unable to run next job: %s', exc) return if not job_info: continue self.safe_handle_running_job(job_info)
def _rebuilder_pass(self, **kwargs): self.start_time = self.last_report = time.time() self.log_report('START', force=True) reply = { 'addr': self.beanstalkd_listener.addr, 'tube': self.beanstalkd_listener.tube, 'rebuilder_id': self.rebuilder_id } thread = threading.Thread(target=self._distribute_broken_chunks, args=(reply, ), kwargs=kwargs) thread.start() def is_finish(): total_events = 0 for _, sender in self.beanstalkd_senders.iteritems(): total_events += sender.nb_events return total_events <= 0 while thread.is_alive(): if self.sending: break else: time.sleep(0.1) while thread.is_alive() or not is_finish(): try: event_info = self.beanstalkd_listener.fetch_event( self._rebuilt_chunk_from_event, timeout=DISTRIBUTED_REBUILDER_TIMEOUT, **kwargs) for beanstalkd_addr, chunk, bytes_processed, error \ in event_info: self.beanstalkd_senders[beanstalkd_addr].event_done() self.update_processed(chunk, bytes_processed, error=error, **kwargs) self.log_report('RUN', **kwargs) except OioTimeout: self.logger.error("No response since %d secondes", DISTRIBUTED_REBUILDER_TIMEOUT) self.log_report('DONE', force=True) return False self.log_report('DONE', force=True) return self.total_errors == 0