class StreamConsumer(BaseConsumer): """ A consumer intended to control stream harvests using Supervisor. When it receives a harvest start message, it starts a supervisor process for harvesting the message. When it receives a harvest stop message, it removes the supervisor process for the harvest. Logs for the supervisor processes are in /var/log/sfm. """ def __init__(self, script, working_path, debug=False, mq_config=None, debug_warcprox=False, tries=3): BaseConsumer.__init__(self, working_path=working_path, mq_config=mq_config) # Add routing keys for harvest stop messages # The queue will be unique to this instance of StreamServer so that it # will receive all stop requests if mq_config: for queue, routing_keys in list(mq_config.queues.items()): mq_config.queues["_".join([queue, socket.gethostname()])] = [ routing_key.replace("start", "stop") for routing_key in routing_keys ] log.debug("Queues are now %s", mq_config.queues) self.message = None self.debug = debug self.debug_warcprox = debug_warcprox self.tries = tries self._supervisor = HarvestSupervisor(script, mq_config.host, mq_config.username, mq_config.password, working_path, debug=debug, process_owner="sfm") # Shutdown Supervisor. def shutdown(signal_number, stack_frame): log.debug("Shutdown triggered") self._supervisor.pause_all() self.should_stop = True log.debug("Registering shutdown signal") signal.signal(signal.SIGTERM, shutdown) signal.signal(signal.SIGINT, shutdown) def on_message(self): harvest_id = self.message["id"] if self.routing_key.startswith("harvest.start."): # Start log.info("Starting %s", harvest_id) log.debug("Message for %s is %s", harvest_id, json.dumps(self.message, indent=4)) self._supervisor.start(self.message, self.routing_key, debug=self.debug, debug_warcprox=self.debug_warcprox, tries=self.tries) else: # Stop log.info("Stopping %s", harvest_id) self._supervisor.remove(harvest_id)
def test_supervisor_start_and_stop(self, mock_server_proxy_class): message = { "id": "test:1", "collection_set": { "id": "test_collection_set", } } conf_path = tempfile.mkdtemp() log_path = tempfile.mkdtemp() # Setup mocks mock_server_proxy1 = MagicMock(spec=ServerProxy) mock_supervisor1 = MagicMock() mock_server_proxy1.supervisor = mock_supervisor1 mock_server_proxy2 = MagicMock(spec=ServerProxy) mock_supervisor2 = MagicMock() mock_server_proxy2.supervisor = mock_supervisor2 mock_server_proxy3 = MagicMock(spec=ServerProxy) mock_supervisor3 = MagicMock() mock_server_proxy3.supervisor = mock_supervisor3 mock_server_proxy4 = MagicMock(spec=ServerProxy) mock_supervisor4 = MagicMock() mock_server_proxy4.supervisor = mock_supervisor4 # Return mock_twarc when instantiating a twarc. mock_server_proxy_class.side_effect = [ mock_server_proxy1, mock_server_proxy2, mock_server_proxy3, mock_server_proxy4 ] supervisor = HarvestSupervisor("/opt/sfm/test_harvester.py", "test_host", "test_user", "test_password", self.working_path, conf_path=conf_path, log_path=log_path, debug=True) # Conf_path is empty self.assertFalse(os.listdir(conf_path)) # Start (which calls stop first) supervisor.start(message, "harvest.start.test.test_search", debug=False, debug_warcprox=True, tries=4) # Seed file contains message. with open(os.path.join(conf_path, "test_1.json")) as f: seed = json.load(f) self.assertDictEqual(message, seed["message"]) # Conf file as expected with open(os.path.join(conf_path, "test_1.conf")) as f: conf = f.read() self.assertEqual( """[program:test_1] command=python /opt/sfm/test_harvester.py --debug=False --debug-warcprox=True seed {conf_path}/test_1.json {working_path} --streaming --host test_host --username test_user --password test_password --tries 4 user={user} autostart=true autorestart=unexpected exitcodes=0,1 stopwaitsecs=900 stderr_logfile={log_path}/test_1.err.log stdout_logfile={log_path}/test_1.out.log """.format(conf_path=conf_path, log_path=log_path, user=getpass.getuser(), working_path=self.working_path), conf) # Remove process called mock_supervisor1.stopProcess.assert_called_once_with("test_1", True) mock_supervisor1.removeProcessGroup.assert_called_once_with("test_1") # Reload_config called mock_supervisor2.reloadConfig.assert_called_once_with() # Add process group called mock_supervisor3.addProcessGroup.assert_called_once_with("test_1") # Now stop supervisor.remove("test:1") # Remove process called mock_supervisor4.stopProcess.assert_called_once_with("test_1", True) mock_supervisor4.removeProcessGroup.assert_called_once_with("test_1") # Files deleted self.assertFalse(os.path.exists(os.path.join(conf_path, "test_1.json"))) self.assertFalse(os.path.exists(os.path.join(conf_path, "test_1.conf"))) shutil.rmtree(conf_path) shutil.rmtree(log_path)