def test_that_mgmt_messages_work(self): topic = "me" identity = "myself" data = "and i" msg = MgmtMessage([topic, identity, data]) self.assertEqual(topic, msg.topic) self.assertEqual(identity, msg.identity) self.assertEqual(data, msg.data) self.assertEqual([topic, identity, data], msg.serialize()) self.assertEqual(msg, MgmtMessage(msg.serialize()))
def assert_expected_result_and_stop(raw_msg): msg2 = DataMessage(raw_msg) self.assertEqual(CURI_OPTIONAL_TRUE, msg2.curi.optional_vars[CURI_EXTRACTION_FINISHED]) death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER, data=ZMQ_SPYDER_MGMT_WORKER_QUIT) self._mgmt_sockets['master_pub'].send_multipart(death.serialize())
def assert_expected_result_and_stop(raw_msg): msg = DataMessage(raw_msg) self.assertEqual(304, msg.curi.status_code) self.assertEqual("", msg.curi.content_body) death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER, data=ZMQ_SPYDER_MGMT_WORKER_QUIT) self._mgmt_sockets['master_pub'].send_multipart(death.serialize())
def handle_shutdown_signal(_sig, _frame): """ Called from the os when a shutdown signal is fired. """ msg = MgmtMessage(data=ZMQ_SPYDER_MGMT_WORKER_QUIT) quit_worker(msg.serialize()) # zmq 2.1 stops blocking calls, restart the ioloop io_loop.start()
def assert_expected_result_and_stop(raw_msg): msg = DataMessage(raw_msg) robots = open(os.path.join(os.path.dirname(__file__), "static/robots.txt")).read() self.assertEqual(robots, msg.curi.content_body) death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER, data=ZMQ_SPYDER_MGMT_WORKER_QUIT) self._mgmt_sockets['master_pub'].send_multipart(death.serialize())
def publish(self, topic=None, identity=None, data=None): """ Publish a message to the intended audience. """ assert topic is not None assert data is not None msg = MgmtMessage(topic=topic, identity=identity, data=data) self._out_stream.send_multipart(msg.serialize())
def assert_expected_result_and_stop(raw_msg): msg = DataMessage(raw_msg) robots = open( os.path.join(os.path.dirname(__file__), "static/robots.txt")).read() self.assertEqual(robots, msg.curi.content_body) death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER, data=ZMQ_SPYDER_MGMT_WORKER_QUIT) self._mgmt_sockets['master_pub'].send_multipart(death.serialize())
def quit_worker(raw_msg): """ When the worker should quit, stop the io_loop after 2 seconds. """ msg = MgmtMessage(raw_msg) if ZMQ_SPYDER_MGMT_WORKER_QUIT == msg.data: logger.info("process::We have been asked to shutdown, do so") DelayedCallback(io_loop.stop, 2000, io_loop).start() ack = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER, identity=identity, data=ZMQ_SPYDER_MGMT_WORKER_QUIT_ACK) mgmt._out_stream.send_multipart(ack.serialize())
def test_that_creating_mgmt_works(self): ctx = zmq.Context() io_loop = IOLoop.instance() def stop_looping(_msg): io_loop.stop() settings = Settings() settings.ZEROMQ_MASTER_PUSH = 'inproc://spyder-zmq-master-push' settings.ZEROMQ_WORKER_PROC_FETCHER_PULL = \ settings.ZEROMQ_MASTER_PUSH settings.ZEROMQ_MASTER_SUB = 'inproc://spyder-zmq-master-sub' settings.ZEROMQ_WORKER_PROC_EXTRACTOR_PUB = \ settings.ZEROMQ_MASTER_SUB settings.ZEROMQ_MGMT_MASTER = 'inproc://spyder-zmq-mgmt-master' settings.ZEROMQ_MGMT_WORKER = 'inproc://spyder-zmq-mgmt-worker' pubsocket = ctx.socket(zmq.PUB) pubsocket.bind(settings.ZEROMQ_MGMT_MASTER) pub_stream = ZMQStream(pubsocket, io_loop) subsocket = ctx.socket(zmq.SUB) subsocket.setsockopt(zmq.SUBSCRIBE, "") subsocket.bind(settings.ZEROMQ_MGMT_WORKER) sub_stream = ZMQStream(subsocket, io_loop) mgmt = workerprocess.create_worker_management(settings, ctx, io_loop) mgmt.add_callback(ZMQ_SPYDER_MGMT_WORKER, stop_looping) mgmt.start() def assert_quit_message(msg): self.assertEqual(ZMQ_SPYDER_MGMT_WORKER_QUIT_ACK, msg.data) sub_stream.on_recv(assert_quit_message) death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER, data=ZMQ_SPYDER_MGMT_WORKER_QUIT) pub_stream.send_multipart(death.serialize()) io_loop.start() mgmt._out_stream.close() mgmt._in_stream.close() mgmt._publisher.close() mgmt._subscriber.close() pub_stream.close() pubsocket.close() sub_stream.close() subsocket.close() ctx.term()
def test_simple_mgmt_session(self): mgmt = ZmqMgmt(self._worker_sub, self._worker_pub, io_loop=self._io_loop) mgmt.start() self.assertRaises(ValueError, mgmt.add_callback, "test", "test") mgmt.add_callback(self._topic, self.call_me) mgmt.add_callback(ZMQ_SPYDER_MGMT_WORKER, self.on_end) test_msg = MgmtMessage(topic=self._topic, data='test'.encode()) self._master_pub.send_multipart(test_msg.serialize()) def assert_correct_mgmt_answer(raw_msg): msg = MgmtMessage(raw_msg) self.assertEqual(ZMQ_SPYDER_MGMT_WORKER_QUIT_ACK, msg.data) mgmt.remove_callback(self._topic, self.call_me) mgmt.remove_callback(ZMQ_SPYDER_MGMT_WORKER, self.on_end) self.assertEqual({}, mgmt._callbacks) self._master_sub.on_recv(assert_correct_mgmt_answer) self._io_loop.start()
def echo_processing(self, crawl_uri): death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER, data=ZMQ_SPYDER_MGMT_WORKER_QUIT) self._mgmt_sockets['master_pub'].send_multipart(death.serialize()) return crawl_uri
def echo_processing(self, data_message, out_socket): msg = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER, data=ZMQ_SPYDER_MGMT_WORKER_QUIT) self._mgmt_sockets['master_pub'].send_multipart(msg.serialize()) out_socket.send_multipart(data_message.serialize())
def echo_processing(self, data_message, out_socket): msg = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER, data=ZMQ_SPYDER_MGMT_WORKER_QUIT) self._mgmt_sockets["master_pub"].send_multipart(msg.serialize()) out_socket.send_multipart(data_message.serialize())
def call_me(self, msg): self.assertEqual(self._topic, msg.topic) self.assertEqual('test'.encode(), msg.data) death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER, data=ZMQ_SPYDER_MGMT_WORKER_QUIT) self._master_pub.send_multipart(death.serialize())
def main(settings): """ The :meth:`main` method for worker processes. Here we will: - create a :class:`ZmqMgmt` instance - create a :class:`Fetcher` instance - initialize and instantiate the extractor chain The `settings` have to be loaded already. """ # create my own identity identity = "worker:%s:%s" % (socket.gethostname(), os.getpid()) ctx = zmq.Context() io_loop = IOLoop.instance() # initialize the logging subsystem log_pub = ctx.socket(zmq.PUB) log_pub.connect(settings.ZEROMQ_LOGGING) zmq_logging_handler = PUBHandler(log_pub) zmq_logging_handler.root_topic = "spyder.worker" logger = logging.getLogger() logger.addHandler(zmq_logging_handler) logger.setLevel(settings.LOG_LEVEL_WORKER) logger.info("process::Starting up another worker") mgmt = create_worker_management(settings, ctx, io_loop) logger.debug("process::Initializing fetcher, extractor and scoper") fetcher = create_worker_fetcher(settings, mgmt, ctx, zmq_logging_handler, io_loop) fetcher.start() extractor = create_worker_extractor(settings, mgmt, ctx, zmq_logging_handler, io_loop) extractor.start() def quit_worker(raw_msg): """ When the worker should quit, stop the io_loop after 2 seconds. """ msg = MgmtMessage(raw_msg) if ZMQ_SPYDER_MGMT_WORKER_QUIT == msg.data: logger.info("process::We have been asked to shutdown, do so") DelayedCallback(io_loop.stop, 2000, io_loop).start() ack = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER, identity=identity, data=ZMQ_SPYDER_MGMT_WORKER_QUIT_ACK) mgmt._out_stream.send_multipart(ack.serialize()) mgmt.add_callback(ZMQ_SPYDER_MGMT_WORKER, quit_worker) mgmt.start() # notify the master that we are online msg = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER, identity=identity, data=ZMQ_SPYDER_MGMT_WORKER_AVAIL) mgmt._out_stream.send_multipart(msg.serialize()) def handle_shutdown_signal(_sig, _frame): """ Called from the os when a shutdown signal is fired. """ msg = MgmtMessage(data=ZMQ_SPYDER_MGMT_WORKER_QUIT) quit_worker(msg.serialize()) # zmq 2.1 stops blocking calls, restart the ioloop io_loop.start() # handle kill signals signal.signal(signal.SIGINT, handle_shutdown_signal) signal.signal(signal.SIGTERM, handle_shutdown_signal) logger.info("process::waiting for action") # this will block until the worker quits try: io_loop.start() except ZMQError: logger.debug("Caught a ZMQError. Hopefully during shutdown") logger.debug(traceback.format_exc()) for mod in [fetcher, extractor, mgmt]: mod.close() logger.info("process::Houston: Worker down") ctx.term()