class Service(ChromaService): def __init__(self): super(Service, self).__init__() self._children_started = threading.Event() self._complete = threading.Event() def run(self): from chroma_core.services.job_scheduler.job_scheduler import JobScheduler from chroma_core.services.job_scheduler.job_scheduler_client import JobSchedulerRpc from chroma_core.services.job_scheduler.agent_rpc import AgentRpc super(Service, self).run() # Cancel anything that's left behind from a previous run for command in Command.objects.filter(complete=False): command.completed(True, True) Job.objects.filter(~Q(state='complete')).update(state='complete', cancelled=True) self._job_scheduler = JobScheduler() self._queue_thread = ServiceThread(QueueHandler(self._job_scheduler)) self._rpc_thread = ServiceThread(JobSchedulerRpc(self._job_scheduler)) self._progress_thread = ServiceThread(self._job_scheduler.progress) AgentRpc.start() self._queue_thread.start() self._rpc_thread.start() self._progress_thread.start() self._children_started.set() self._mail_alerts_thread = MailAlerts(settings.EMAIL_SENDER, settings.EMAIL_SUBJECT_PREFIX, settings.EMAIL_HOST) self._mail_alerts_thread.start() self._complete.wait() self.log.info("Cancelling outstanding jobs...") # Get a fresh view of the job table with transaction.commit_manually(): transaction.commit() for job in Job.objects.filter(~Q(state='complete')).order_by('-id'): self._job_scheduler.cancel_job(job.id) def stop(self): from chroma_core.services.job_scheduler.agent_rpc import AgentRpc super(Service, self).stop() # Guard against trying to stop after child threads are created, but before they are started self._children_started.wait() AgentRpc.shutdown() self.log.info("Stopping...") self._rpc_thread.stop() self._queue_thread.stop() self._progress_thread.stop() self._mail_alerts_thread.stop() self.log.info("Joining...") self._rpc_thread.join() self._queue_thread.join() self._job_scheduler.join_run_threads() self._progress_thread.join() self._mail_alerts_thread.join() self.log.info("Complete.") self._complete.set()
def run(self): super(Service, self).run() self.amqp_tx_forwarder = AmqpTxForwarder(self.queues) self.amqp_rx_forwarder = AmqpRxForwarder(self.queues) # This thread listens to an AMQP queue and appends incoming messages # to queues for retransmission to agents tx_svc_thread = ServiceThread(self.amqp_tx_forwarder) # This thread listens to local queues and appends received messages # to an AMQP queue rx_svc_thread = ServiceThread(self.amqp_rx_forwarder) rx_svc_thread.start() tx_svc_thread.start() # FIXME: this TERMINATE_ALL format could in principle # be passed back from the agent (but it should never # originate there), affecting sessions for other agents. # At restart, message receiving services to clear out any # existing session state (from a previous instance of this # service). for plugin in ['action_runner']: self.queues.receive({ 'fqdn': None, 'type': 'SESSION_TERMINATE_ALL', 'plugin': plugin, 'session_id': None, 'session_seq': None, 'body': None }) # This thread services session management RPCs, so that other # services can explicitly request a session reset session_rpc_thread = ServiceThread(HttpAgentRpc(self)) session_rpc_thread.start() # Hook up the request handler MessageView.queues = self.queues MessageView.sessions = self.sessions MessageView.hosts = self.hosts ValidatedClientView.valid_certs = self.valid_certs # The thread for generating HostOfflineAlerts host_checker_thread = ServiceThread( HostStatePoller(self.hosts, self.sessions)) host_checker_thread.start() # The main thread serves incoming requests to exchanges messages # with agents, until it is interrupted (gevent handles signals for us) self.server = gevent.wsgi.WSGIServer(('', HTTP_AGENT_PORT), WSGIHandler()) self.server.serve_forever() session_rpc_thread.stop() tx_svc_thread.stop() rx_svc_thread.stop() host_checker_thread.stop() session_rpc_thread.join() tx_svc_thread.join() tx_svc_thread.join() host_checker_thread.join()