Exemplo n.º 1
0
class StreamConsumer(BaseConsumer):
    """
    A consumer intended to control stream harvests using Supervisor.

    When it receives a harvest start message, it starts a supervisor
    process for harvesting the message.

    When it receives a harvest stop message, it removes the supervisor process
    for the harvest.

    Logs for the supervisor processes are in /var/log/sfm.
    """

    def __init__(self, script, working_path, debug=False, mq_config=None, debug_warcprox=False, tries=3):
        BaseConsumer.__init__(self, working_path=working_path, mq_config=mq_config)
        # Add routing keys for harvest stop messages
        # The queue will be unique to this instance of StreamServer so that it
        # will receive all stop requests
        if mq_config:
            for queue, routing_keys in mq_config.queues.items():
                mq_config.queues["_".join([queue, socket.gethostname()])] = [routing_key.replace("start", "stop")
                                                                             for routing_key in routing_keys]
            log.debug("Queues are now %s", mq_config.queues)

        self.message = None
        self.debug = debug
        self.debug_warcprox = debug_warcprox
        self.tries = tries
        self._supervisor = HarvestSupervisor(script, mq_config.host, mq_config.username, mq_config.password,
                                             working_path, debug=debug, process_owner="sfm")

    def on_message(self):
        harvest_id = self.message["id"]
        if self.routing_key.startswith("harvest.start."):
            # Start
            log.info("Starting %s", harvest_id)
            log.debug("Message for %s is %s", harvest_id, json.dumps(self.message, indent=4))
            self._supervisor.start(self.message, self.routing_key, debug=self.debug, debug_warcprox=self.debug_warcprox,
                                   tries=self.tries)
        else:
            # Stop
            log.info("Stopping %s", harvest_id)
            self._supervisor.stop(harvest_id)
Exemplo n.º 2
0
    def __init__(self,
                 script,
                 working_path,
                 debug=False,
                 mq_config=None,
                 debug_warcprox=False,
                 tries=3):
        BaseConsumer.__init__(self,
                              working_path=working_path,
                              mq_config=mq_config)
        # Add routing keys for harvest stop messages
        # The queue will be unique to this instance of StreamServer so that it
        # will receive all stop requests
        if mq_config:
            for queue, routing_keys in list(mq_config.queues.items()):
                mq_config.queues["_".join([queue, socket.gethostname()])] = [
                    routing_key.replace("start", "stop")
                    for routing_key in routing_keys
                ]
            log.debug("Queues are now %s", mq_config.queues)

        self.message = None
        self.debug = debug
        self.debug_warcprox = debug_warcprox
        self.tries = tries
        self._supervisor = HarvestSupervisor(script,
                                             mq_config.host,
                                             mq_config.username,
                                             mq_config.password,
                                             working_path,
                                             debug=debug,
                                             process_owner="sfm")

        # Shutdown Supervisor.
        def shutdown(signal_number, stack_frame):
            log.debug("Shutdown triggered")
            self._supervisor.pause_all()
            self.should_stop = True

        log.debug("Registering shutdown signal")

        signal.signal(signal.SIGTERM, shutdown)
        signal.signal(signal.SIGINT, shutdown)
Exemplo n.º 3
0
    def __init__(self, script, working_path, debug=False, mq_config=None, debug_warcprox=False, tries=3):
        BaseConsumer.__init__(self, working_path=working_path, mq_config=mq_config)
        # Add routing keys for harvest stop messages
        # The queue will be unique to this instance of StreamServer so that it
        # will receive all stop requests
        if mq_config:
            for queue, routing_keys in mq_config.queues.items():
                mq_config.queues["_".join([queue, socket.gethostname()])] = [routing_key.replace("start", "stop")
                                                                             for routing_key in routing_keys]
            log.debug("Queues are now %s", mq_config.queues)

        self.message = None
        self.debug = debug
        self.debug_warcprox = debug_warcprox
        self.tries = tries
        self._supervisor = HarvestSupervisor(script, mq_config.host, mq_config.username, mq_config.password,
                                             working_path, debug=debug, process_owner="sfm")
Exemplo n.º 4
0
class StreamConsumer(BaseConsumer):
    """
    A consumer intended to control stream harvests using Supervisor.

    When it receives a harvest start message, it starts a supervisor
    process for harvesting the message.

    When it receives a harvest stop message, it removes the supervisor process
    for the harvest.

    Logs for the supervisor processes are in /var/log/sfm.
    """
    def __init__(self,
                 script,
                 working_path,
                 debug=False,
                 mq_config=None,
                 debug_warcprox=False,
                 tries=3):
        BaseConsumer.__init__(self,
                              working_path=working_path,
                              mq_config=mq_config)
        # Add routing keys for harvest stop messages
        # The queue will be unique to this instance of StreamServer so that it
        # will receive all stop requests
        if mq_config:
            for queue, routing_keys in list(mq_config.queues.items()):
                mq_config.queues["_".join([queue, socket.gethostname()])] = [
                    routing_key.replace("start", "stop")
                    for routing_key in routing_keys
                ]
            log.debug("Queues are now %s", mq_config.queues)

        self.message = None
        self.debug = debug
        self.debug_warcprox = debug_warcprox
        self.tries = tries
        self._supervisor = HarvestSupervisor(script,
                                             mq_config.host,
                                             mq_config.username,
                                             mq_config.password,
                                             working_path,
                                             debug=debug,
                                             process_owner="sfm")

        # Shutdown Supervisor.
        def shutdown(signal_number, stack_frame):
            log.debug("Shutdown triggered")
            self._supervisor.pause_all()
            self.should_stop = True

        log.debug("Registering shutdown signal")

        signal.signal(signal.SIGTERM, shutdown)
        signal.signal(signal.SIGINT, shutdown)

    def on_message(self):
        harvest_id = self.message["id"]
        if self.routing_key.startswith("harvest.start."):
            # Start
            log.info("Starting %s", harvest_id)
            log.debug("Message for %s is %s", harvest_id,
                      json.dumps(self.message, indent=4))
            self._supervisor.start(self.message,
                                   self.routing_key,
                                   debug=self.debug,
                                   debug_warcprox=self.debug_warcprox,
                                   tries=self.tries)
        else:
            # Stop
            log.info("Stopping %s", harvest_id)
            self._supervisor.remove(harvest_id)
Exemplo n.º 5
0
    def test_supervisor_start_and_stop(self, mock_server_proxy_class):
        message = {
            "id": "test:1",
            "collection_set": {
                "id": "test_collection_set",
            }
        }

        conf_path = tempfile.mkdtemp()
        log_path = tempfile.mkdtemp()

        # Setup mocks
        mock_server_proxy1 = MagicMock(spec=ServerProxy)
        mock_supervisor1 = MagicMock()
        mock_server_proxy1.supervisor = mock_supervisor1
        mock_server_proxy2 = MagicMock(spec=ServerProxy)
        mock_supervisor2 = MagicMock()
        mock_server_proxy2.supervisor = mock_supervisor2
        mock_server_proxy3 = MagicMock(spec=ServerProxy)
        mock_supervisor3 = MagicMock()
        mock_server_proxy3.supervisor = mock_supervisor3
        mock_server_proxy4 = MagicMock(spec=ServerProxy)
        mock_supervisor4 = MagicMock()
        mock_server_proxy4.supervisor = mock_supervisor4

        # Return mock_twarc when instantiating a twarc.
        mock_server_proxy_class.side_effect = [mock_server_proxy1, mock_server_proxy2, mock_server_proxy3,
                                               mock_server_proxy4]

        supervisor = HarvestSupervisor("/opt/sfm/test_harvester.py", "test_host", "test_user", "test_password",
                                       self.working_path, conf_path=conf_path, log_path=log_path, debug=True)

        # Conf_path is empty
        self.assertFalse(os.listdir(conf_path))

        # Start (which calls stop first)
        supervisor.start(message, "harvest.start.test.test_search", debug=False, debug_warcprox=True, tries=4)

        # Seed file contains message.
        with open(os.path.join(conf_path, "test_1.json")) as f:
            seed = json.load(f)
        self.assertDictEqual(message, seed["message"])

        # Conf file as expected
        with open(os.path.join(conf_path, "test_1.conf")) as f:
            conf = f.read()
        self.assertEqual("""[program:test_1]
command=python /opt/sfm/test_harvester.py --debug=False --debug-warcprox=True seed {conf_path}/test_1.json {working_path} --streaming --host test_host --username test_user --password test_password --tries 4
user={user}
autostart=true
autorestart=unexpected
exitcodes=0,1
stopwaitsecs=900
stderr_logfile={log_path}/test_1.err.log
stdout_logfile={log_path}/test_1.out.log
""".format(conf_path=conf_path, log_path=log_path, user=getpass.getuser(), working_path=self.working_path), conf)

        # Remove process called
        mock_supervisor1.stopProcess.assert_called_once_with("test_1", True)
        mock_supervisor1.removeProcessGroup.assert_called_once_with("test_1")

        # Reload_config called
        mock_supervisor2.reloadConfig.assert_called_once_with()

        # Add process group called
        mock_supervisor3.addProcessGroup.assert_called_once_with("test_1")

        # Now stop
        supervisor.stop("test:1")
        # Remove process called
        mock_supervisor4.stopProcess.assert_called_once_with("test_1", True)
        mock_supervisor4.removeProcessGroup.assert_called_once_with("test_1")

        # Files deleted
        self.assertFalse(os.path.exists(os.path.join(conf_path, "test_1.json")))
        self.assertFalse(os.path.exists(os.path.join(conf_path, "test_1.conf")))

        shutil.rmtree(conf_path)
        shutil.rmtree(log_path)
Exemplo n.º 6
0
    def test_supervisor_start_and_stop(self, mock_server_proxy_class):
        message = {
            "id": "test:1",
            "collection_set": {
                "id": "test_collection_set",
            }
        }

        conf_path = tempfile.mkdtemp()
        log_path = tempfile.mkdtemp()

        # Setup mocks
        mock_server_proxy1 = MagicMock(spec=ServerProxy)
        mock_supervisor1 = MagicMock()
        mock_server_proxy1.supervisor = mock_supervisor1
        mock_server_proxy2 = MagicMock(spec=ServerProxy)
        mock_supervisor2 = MagicMock()
        mock_server_proxy2.supervisor = mock_supervisor2
        mock_server_proxy3 = MagicMock(spec=ServerProxy)
        mock_supervisor3 = MagicMock()
        mock_server_proxy3.supervisor = mock_supervisor3
        mock_server_proxy4 = MagicMock(spec=ServerProxy)
        mock_supervisor4 = MagicMock()
        mock_server_proxy4.supervisor = mock_supervisor4

        # Return mock_twarc when instantiating a twarc.
        mock_server_proxy_class.side_effect = [
            mock_server_proxy1, mock_server_proxy2, mock_server_proxy3,
            mock_server_proxy4
        ]

        supervisor = HarvestSupervisor("/opt/sfm/test_harvester.py",
                                       "test_host",
                                       "test_user",
                                       "test_password",
                                       self.working_path,
                                       conf_path=conf_path,
                                       log_path=log_path,
                                       debug=True)

        # Conf_path is empty
        self.assertFalse(os.listdir(conf_path))

        # Start (which calls stop first)
        supervisor.start(message,
                         "harvest.start.test.test_search",
                         debug=False,
                         debug_warcprox=True,
                         tries=4)

        # Seed file contains message.
        with open(os.path.join(conf_path, "test_1.json")) as f:
            seed = json.load(f)
        self.assertDictEqual(message, seed["message"])

        # Conf file as expected
        with open(os.path.join(conf_path, "test_1.conf")) as f:
            conf = f.read()
        self.assertEqual(
            """[program:test_1]
command=python /opt/sfm/test_harvester.py --debug=False --debug-warcprox=True seed {conf_path}/test_1.json {working_path} --streaming --host test_host --username test_user --password test_password --tries 4
user={user}
autostart=true
autorestart=unexpected
exitcodes=0,1
stopwaitsecs=900
stderr_logfile={log_path}/test_1.err.log
stdout_logfile={log_path}/test_1.out.log
""".format(conf_path=conf_path,
           log_path=log_path,
           user=getpass.getuser(),
           working_path=self.working_path), conf)

        # Remove process called
        mock_supervisor1.stopProcess.assert_called_once_with("test_1", True)
        mock_supervisor1.removeProcessGroup.assert_called_once_with("test_1")

        # Reload_config called
        mock_supervisor2.reloadConfig.assert_called_once_with()

        # Add process group called
        mock_supervisor3.addProcessGroup.assert_called_once_with("test_1")

        # Now stop
        supervisor.remove("test:1")
        # Remove process called
        mock_supervisor4.stopProcess.assert_called_once_with("test_1", True)
        mock_supervisor4.removeProcessGroup.assert_called_once_with("test_1")

        # Files deleted
        self.assertFalse(os.path.exists(os.path.join(conf_path,
                                                     "test_1.json")))
        self.assertFalse(os.path.exists(os.path.join(conf_path,
                                                     "test_1.conf")))

        shutil.rmtree(conf_path)
        shutil.rmtree(log_path)