class WorkerRep(object): """Helper class to represent a worker in the broker. Instances of this class are used to track the state of the attached worker and carry the timers for incomming and outgoing heartbeats. :param proto: the worker protocol id. :type wid: str :param wid: the worker id. :type wid: str :param service: service this worker serves :type service: str :param stream: the ZMQStream used to send messages :type stream: ZMQStream """ def __init__(self, proto, wid, service, stream): self.proto = proto self.id = wid self.service = service self.curr_liveness = HB_LIVENESS self.stream = stream self.last_hb = 0 self.hb_out_timer = PeriodicCallback(self.send_hb, HB_INTERVAL) self.hb_out_timer.start() return def send_hb(self): """Called on every HB_INTERVAL. Decrements the current liveness by one. Sends heartbeat to worker. """ self.curr_liveness -= 1 msg = [self.id, b'', self.proto, b'\x04'] self.stream.send_multipart(msg) return def on_heartbeat(self): """Called when a heartbeat message from the worker was received. Sets current liveness to HB_LIVENESS. """ self.curr_liveness = HB_LIVENESS return def is_alive(self): """Returns True when the worker is considered alive. """ return self.curr_liveness > 0 def shutdown(self): """Cleanup worker. Stops timer. """ self.hb_out_timer.stop() self.hb_out_timer = None self.stream = None return
class WorkerRep(object): """Helper class to represent a worker in the broker. Instances of this class are used to track the state of the attached worker and carry the timers for incomming and outgoing heartbeats. :type wid: str :param wid: the worker id. :param service: service this worker serves :type service: str :param stream: the ZMQStream used to send messages :type stream: ZMQStream """ def __init__(self, wid, service, stream): self.id = wid self.service = service self.multicasts = [] self.curr_liveness = HB_LIVENESS self.stream = stream self.last_hb = 0 self.hb_out_timer = PeriodicCallback(self.send_hb, HB_INTERVAL) self.hb_out_timer.start() return def send_hb(self): """Called on every HB_INTERVAL. Decrements the current liveness by one. Sends heartbeat to worker. """ self.curr_liveness -= 1 msg = [ self.id, b'', MDP_WORKER_VERSION, b'\x05' ] self.stream.send_multipart(msg) return def on_heartbeat(self): """Called when a heartbeat message from the worker was received. Sets current liveness to HB_LIVENESS. """ self.curr_liveness = HB_LIVENESS return def is_alive(self): """Returns True when the worker is considered alive. """ return self.curr_liveness > 0 def shutdown(self): """Cleanup worker. Stops timer. """ self.hb_out_timer.stop() self.hb_out_timer = None self.stream = None return
class DeviceRep(object): """ Helper class to represent a device to a worker """ def __init__(self, device_id, state='unknown'): self.id = device_id self.state = state self.curr_liveness = CLIENT_HB_LIVENESS self.hb_timer = PeriodicCallback(self.heartbeat, CLIENT_HB_INTERVAL) self.hb_timer.start() return def heartbeat(self): if self.curr_liveness > 0: self.curr_liveness -= 1 if self.curr_liveness == 0: self.state = 'dead' return def on_message_received(self): self.curr_liveness = CLIENT_HB_LIVENESS return def is_alive(self): return self.curr_liveness > 0 def get_state(self): return self.state def shutdown(self): self.hb_timer.stop() self.hb_timer = None
class TaskState (object): """ Tracks task state (with help of watchdog) """ log = skytools.getLogger ('d:TaskState') def __init__ (self, uid, name, info, ioloop, cc, xtx): self.uid = uid self.name = name self.info = info self.pidfile = info['config']['pidfile'] self.ioloop = ioloop self.cc = cc self.xtx = xtx self.timer = None self.timer_tick = 1 self.heartbeat = False self.start_time = None self.dead_since = None def start (self): self.start_time = time.time() self.timer = PeriodicCallback (self.watchdog, self.timer_tick * 1000, self.ioloop) self.timer.start() def stop (self): try: self.log.info ('Signalling %s', self.name) skytools.signal_pidfile (self.pidfile, signal.SIGINT) except: self.log.exception ('signal_pidfile failed: %s', self.pidfile) def watchdog (self): live = skytools.signal_pidfile (self.pidfile, 0) if live: self.log.debug ('%s is alive', self.name) if self.heartbeat: self.send_reply ('running') else: self.log.info ('%s is over', self.name) self.dead_since = time.time() self.timer.stop() self.timer = None self.send_reply ('stopped') def ccpublish (self, msg): assert isinstance (msg, TaskReplyMessage) cmsg = self.xtx.create_cmsg (msg) cmsg.send_to (self.cc) def send_reply (self, status, feedback = {}): msg = TaskReplyMessage( req = 'task.reply.%s' % self.uid, handler = self.info['task']['task_handler'], task_id = self.info['task']['task_id'], status = status, feedback = feedback) self.ccpublish (msg)
class TaskState(object): """ Tracks task state (with help of watchdog) """ log = skytools.getLogger('d:TaskState') def __init__(self, uid, name, info, ioloop, cc, xtx): self.uid = uid self.name = name self.info = info self.pidfile = info['config']['pidfile'] self.ioloop = ioloop self.cc = cc self.xtx = xtx self.timer = None self.timer_tick = 1 self.heartbeat = False self.start_time = None self.dead_since = None def start(self): self.start_time = time.time() self.timer = PeriodicCallback(self.watchdog, self.timer_tick * 1000, self.ioloop) self.timer.start() def stop(self): try: self.log.info('Signalling %s', self.name) skytools.signal_pidfile(self.pidfile, signal.SIGINT) except: self.log.exception('signal_pidfile failed: %s', self.pidfile) def watchdog(self): live = skytools.signal_pidfile(self.pidfile, 0) if live: self.log.debug('%s is alive', self.name) if self.heartbeat: self.send_reply('running') else: self.log.info('%s is over', self.name) self.dead_since = time.time() self.timer.stop() self.timer = None self.send_reply('stopped') def ccpublish(self, msg): assert isinstance(msg, TaskReplyMessage) cmsg = self.xtx.create_cmsg(msg) cmsg.send_to(self.cc) def send_reply(self, status, feedback={}): msg = TaskReplyMessage(req='task.reply.%s' % self.uid, handler=self.info['task']['task_handler'], task_id=self.info['task']['task_id'], status=status, feedback=feedback) self.ccpublish(msg)
class AppClient(object): def __init__(self): self.ctx = zmq.Context() self.loop = IOLoop.instance() self.endpoint = "tcp://127.0.0.1:5556" self.client = self.ctx.socket(zmq.DEALER) self.client.setsockopt( zmq.LINGER, 0 ) # Without linger and timeouts you might have problems when closing context self.client.setsockopt(zmq.RCVTIMEO, 5000) # 5s self.client.setsockopt(zmq.SNDTIMEO, 5000) print("Connecting to", self.endpoint) self.client.connect(self.endpoint) self.client = ZMQStream(self.client) self.client.on_recv(self.on_recv) self.periodic = PeriodicCallback(self.periodictask, 1000) self.last_recv = None def disconnect(self): if self.ctx is not None: try: self.periodic.stop() print("Closing socket and context") self.client.close() self.ctx.term() except Exception as e: print(e) def periodictask(self): if self.client is None: return if not self.last_recv or self.last_recv + timedelta( seconds=5) < datetime.utcnow(): print("No data from remote (5s)... [ping]") print("Sending HELLO to server") msg = HelloMessage() msg.send(self.client) def start(self): try: self.periodic.start() self.loop.start() msg = HelloMessage() msg.send(self.client) except KeyboardInterrupt: print("\n\nCtrl+C detected\n") except Exception as E: print("Error detected") print(str(E)) finally: self.disconnect() def on_recv(self, msg): self.last_recv = datetime.utcnow() print("Received a message of type %s from server!" % msg[0])
class AppServer(object): def __init__(self): self.listen = "127.0.0.1" self.port = 5556 self.ctx = zmq.Context() self.loop = IOLoop.instance() self.client_identities = {} self.server = self.ctx.socket(zmq.ROUTER) self.server.setsockopt( zmq.LINGER, 0 ) # Without linger and timeouts you might have problems when closing context self.server.setsockopt(zmq.RCVTIMEO, 5000) # 5s self.server.setsockopt(zmq.SNDTIMEO, 5000) bind_addr = "tcp://%s:%s" % (self.listen, self.port) self.server.bind(bind_addr) print("Server listening for new client connections at", bind_addr) self.server = ZMQStream(self.server) self.server.on_recv(self.on_recv) self.periodic = PeriodicCallback(self.periodictask, 1000) def start(self): self.periodic.start() try: self.loop.start() except KeyboardInterrupt: self.periodic.stop() print("\nClosing socket and context\n") self.server.close() self.ctx.term() def periodictask(self): stale_clients = [] for client_id, last_seen in self.client_identities.items(): if last_seen + timedelta(seconds=10) < datetime.utcnow(): stale_clients.append(client_id) else: msg = HelloMessage() msg.send(self.server, client_id) for client_id in stale_clients: print( "\nHaven't received a HELO from client %s recently. Dropping from list of connected clients." % client_id) del self.client_identities[client_id] sys.stdout.write(".") sys.stdout.flush() def on_recv(self, msg): identity = msg[ 0] # This contains client id (socket handle), use to reply it back self.client_identities[identity] = datetime.utcnow() msg_type = msg[1] print("Received message of type %s from client ID %s!" % (msg_type, identity))
class DemoApp(ZMQProcess): def __init__ (self): super(DemoApp, self).__init__() def setup(self): super(DemoApp, self).setup() self.pub, self.pub_addr = self.stream(zmq.PUB, 'tcp://127.0.0.1:%(port)s', True) self.sub, sub_addr = self.stream(zmq.SUB, self.pub_addr, False, callback=DemoHandler()) self.heartbeat = PeriodicCallback(self.ping, 1000, self.loop) def ping(self): print 'SEND PING' self.pub.send_multipart(['ping', json.dumps(['ping', time.time()])]) def local_run(self): print 'START HEARTBEAT' self.heartbeat.start() def stop(self): self.heartbeat.stop() self.loop.stop()
class LogWatch_HandleStats (PgLogForwardPlugin): LOG_FORMATS = ['netstr'] def init (self, log_fmt): super(LogWatch_HandleStats, self).init(log_fmt) # depends on pg_settings.log_function_calls self.parse_statements = self.cf.getbool ('parse_statements', True) self.msg_suffix = self.cf.get ('msg-suffix', 'confdb') if self.msg_suffix and not is_msg_req_valid (self.msg_suffix): self.log.error ("invalid msg-suffix: %s", self.msg_suffix) self.msg_suffix = None self.hostname = socket.gethostname() self.stat_queue_name = self.cf.get ('stat_queue_name', '') self.max_stat_items = self.cf.get ('max_stat_items', 10000) self.stat_dump_interval = self.cf.getint ('stat_interval', 3600) self.last_stat_dump = time.time() self.client_stats = {} self.timer = PeriodicCallback (self.save_stats, self.stat_dump_interval * 1000) self.timer.start() def process_netstr (self, data): """ Process contents of collected log chunk. This might be a SQL statement or a connect/disconnect entry. """ if not self.stat_queue_name: return if data['remotehost'] == "[local]": data['remotehost'] = "127.0.0.1" action = None action_duration = 0 statement_duration = 0 call_count = 0 if data['message'].startswith ("connection authorized:"): action = "connect" elif data['message'].startswith ("disconnection"): action = "disconnect" m = rc_disconnect.match (data['message']) if m: action_duration = (int(m.group('hours')) * 3600 + int(m.group('minutes')) * 60 + float(m.group('seconds'))) * 1000 elif not self.parse_statements: # we have function logging enabled, see if we can use it m = rc_logged_func.search (data['message']) if m: # a logged function call, definitely prefer this to parsing action = m.group('func_name') action_duration = float(m.group('time')) / 1000 call_count = int(m.group('calls')) if not action: # we have to parse function call m = rc_sql.search (data['message']) if m: if self.parse_statements: # attempt to parse the function name and parameters #action = self.get_sql_action (m.group('sql')) call_count = 1 # count the overall statement duration action_duration = float(m.group('duration')) statement_duration = action_duration self._update_stats (data, action, action_duration, call_count) self._update_stats (data, "SQL statements", statement_duration, call_count) def _update_stats (self, data, action, duration, call_count): if action: key = (data['database'], data['username'], data['remotehost'], action) cs = self.client_stats.get(key) if cs: cs.update (duration, call_count) elif len(self.client_stats) > self.max_stat_items: self.log.error ("Max stat items exceeded: %i", self.max_stat_items) else: cs = ClientStats (data['database'], data['username'], data['remotehost'], action, duration, call_count) self.client_stats[key] = cs def save_stats (self): """ Dump client stats to database. Scheduled to be called periodically. """ # do not send stats if stats is missing or stats queue is missing if not self.client_stats or not self.stat_queue_name: return now = time.time() time_passed = now - self.last_stat_dump self.log.info ("Sending usage stats to repository [%i]", len(self.client_stats)) # post role usage usage = [] for client in self.client_stats.values(): self.log.trace ("client: %s", client) usage.append (client.to_dict()) params = skytools.db_urlencode(dict( hostname = self.hostname, sample_length = '%d seconds' % time_passed, snap_time = datetime.datetime.now().isoformat())) confdb_funcargs = ('username=discovery', params, skytools.make_record_array(usage)) funcargs = [None, self.stat_queue_name, 'dba.set_role_usage', skytools.db_urlencode(dict(enumerate(confdb_funcargs)))] msg = DatabaseMessage (function = 'pgq.insert_event', params = cc.json.dumps(funcargs)) if self.msg_suffix: msg.req += '.' + self.msg_suffix self.main.ccpublish(msg) self.client_stats = {} self.last_stat_dump = now def stop (self): self.timer.stop()
class WorkerRep(object): """Helper class to represent a worker in the broker. Instances of this class are used to track the state of the attached worker and carry the timers for incoming and outgoing heartbeats. :param proto: the worker protocol id. :type wid: str :param wid: the worker id. :type wid: str :param service: service this worker serves :type service: str :param stream: the ZMQStream used to send messages :type stream: ZMQStream """ def __init__(self, proto, wid, service, stream): self.proto = proto self.id = wid self.service = service self.curr_liveness = HB_LIVENESS self.stream = stream self.send_uniqueid() self.hb_out_timer = PeriodicCallback(self.send_hb, HB_INTERVAL) self.hb_out_timer.start() def send_uniqueid(self): """Called on W_READY from worker. Sends unique id to worker. """ logging.debug('Broker to Worker {} sending unique id: {}'.format( self.service, self.id)) msg = [self.id, EMPTY_FRAME, self.proto, W_READY, self.id] self.stream.send_multipart(msg) def send_hb(self): """Called on every HB_INTERVAL. Decrements the current liveness by one. Sends heartbeat to worker. """ self.curr_liveness -= 1 logging.debug('Broker to Worker {} HB tick, current liveness: {}'.format( self.service, self.curr_liveness)) msg = [self.id, EMPTY_FRAME, self.proto, W_HEARTBEAT] self.stream.send_multipart(msg) def on_heartbeat(self): """Called when a heartbeat message from the worker was received. Sets current liveness to HB_LIVENESS. """ logging.debug('Received HB from worker {}'.format(self.service)) self.curr_liveness = HB_LIVENESS def is_alive(self): """Returns True when the worker is considered alive. """ return self.curr_liveness > 0 def shutdown(self): """Cleanup worker. Stops timer. """ logging.info('Shuting down worker {}'.format(self.service)) self.hb_out_timer.stop() self.hb_out_timer = None self.stream = None
class Worker(object): """Class for the MDP worker side. Thin encapsulation of a zmq.DEALER socket. Provides a send method with optional timeout parameter. Will use a timeout to indicate a broker failure. """ max_forks = 10 ipc = 'ipc:///tmp/zmq-rpc-'+str(uuid4()) HB_INTERVAL = 1000 # in milliseconds HB_LIVENESS = 3 # HBs to miss before connection counts as dead def __init__(self, context, endpoint, service, multicasts=()): """Initialize the MDPWorker. :param context: is the zmq context to create the socket from :type context: zmq.Context :param service: service name - you can put hostname here :type service: str :param multicasts: list of groups to subscribe :type multicasts: list """ self.context = context self.endpoint = endpoint self.service = service.encode('utf-8') # convert to byte-string - required in python 3 self.multicasts = [m.encode('utf-8') for m in multicasts] # convert to byte-string self.stream = None self._tmo = None self.need_handshake = True self.ticker = None self._delayed_cb = None self._create_stream() self.forks = [] self.curr_liveness = self.HB_LIVENESS socket = self.context.socket(zmq.ROUTER) socket.bind(self.ipc) self.stream_w = ZMQStream(socket) self.stream_w.on_recv(self._on_fork_response) self.reply_socket = None return def _create_stream(self): """Helper to create the socket and the stream. """ self.on_log_event('broker.connect', 'Trying to connect do broker') socket = self.context.socket(zmq.DEALER) ioloop = IOLoop.instance() self.stream = ZMQStream(socket, ioloop) self.stream.on_recv(self._on_message) self.stream.socket.setsockopt(zmq.LINGER, 0) self.stream.connect(self.endpoint) self.ticker = PeriodicCallback(self._tick, self.HB_INTERVAL) self._send_ready() for m in self.multicasts: self._register_worker_to_multicast(m) self.ticker.start() return def _tick(self): """Method called every HB_INTERVAL milliseconds. """ self.curr_liveness -= 1 self.send_hb() if self.curr_liveness >= 0: return # ouch, connection seems to be dead self.on_log_event('broker.timeout', 'Connection to broker timeouted, disconnecting') self.shutdown(False) # try to recreate it self._delayed_cb = DelayedCallback(self._create_stream, 5000) self._delayed_cb.start() return def send_hb(self): """Construct and send HB message to broker. """ msg = [b'', MDP_WORKER_VERSION, b'\x05'] self.stream.send_multipart(msg) return def shutdown(self, final=True): """Method to deactivate the worker connection completely. Will delete the stream and the underlying socket. :param final: if shutdown is final and we want to close all sockets :type final: bool """ if self.ticker: self.ticker.stop() self.ticker = None if not self.stream: return self.stream.on_recv(None) self.disconnect() self.stream.socket.close() self.stream.close() self.stream = None self.need_handshake = True if final: self.stream_w.socket.close() self.stream_w.close() self.stream = None return def disconnect(self): """Helper method to send the workers DISCONNECT message. """ self.stream.socket.send_multipart([b'', MDP_WORKER_VERSION, b'\x06' ]) self.curr_liveness = self.HB_LIVENESS return def _send_ready(self): """Helper method to prepare and send the workers READY message. """ self.on_log_event('broker.ready', 'Sending ready to broker.') ready_msg = [b'', MDP_WORKER_VERSION, b'\x01', self.service] self.stream.send_multipart(ready_msg) self.curr_liveness = self.HB_LIVENESS return def _register_worker_to_multicast(self, name): """Helper method to register worker to multicast group :param name: group name :type name: str """ self.on_log_event('broker.register-group', 'Subscribing to group \'%s\'.' % name) reg_msg = [b'', MDP_WORKER_VERSION, b'\x07', name] self.stream.send_multipart(reg_msg) self.curr_liveness = self.HB_LIVENESS return def _on_message(self, msg): """Helper method called on message receive. :param msg: message parts :type msg: list """ # 1st part is empty msg.pop(0) # 2nd part is protocol version protocol_version = msg.pop(0) if protocol_version != MDP_WORKER_VERSION: # version check, ignore old versions return # 3rd part is message type msg_type = msg.pop(0) # any message resets the liveness counter self.need_handshake = False self.curr_liveness = self.HB_LIVENESS if msg_type == b'\x06': # disconnect self.curr_liveness = 0 # reconnect will be triggered by hb timer elif msg_type == b'\x02': # request # remaining parts are the user message addresses, msg = self.split_address(msg) self._on_request(addresses, msg) elif msg_type == b'\x05': # received hardbeat - timer handled above pass else: # invalid message ignored pass return def _on_fork_response(self, to_send): """Helper method to send message from forked worker. This message will be received by main worker process and resend to broker. :param to_send address and data to send :type to_send list """ self.stream.send_multipart(to_send) return def send_reply(self, addresses, msg, partial=False, exception=False): """Send reply from forked worker process. This method can be called only from do_work() method! This method will send messages to main worker listening on local socket in /tmp/zmq-rpc-... :param addresses: return address stack :type addresses: list of str :param msg: return value from called method :type msg: mixed :param partial: if the message is partial or final :type partial: bool :param exception: if the message is exception, msg format is: {'class':'c', 'message':'m', 'traceback':'t'} :type exception: bool """ if not self.reply_socket: context = zmq.Context() self.reply_socket = context.socket(zmq.DEALER) self.reply_socket.connect(self.ipc) msg = msgpack.Packer().pack(msg) if exception: to_send = [b'', MDP_WORKER_VERSION, b'\x08'] elif partial: to_send = [b'', MDP_WORKER_VERSION, b'\x03'] else: to_send = [b'', MDP_WORKER_VERSION, b'\x04'] to_send.extend(addresses) to_send.append(b'') if isinstance(msg, list): to_send.extend(msg) else: to_send.append(msg) m = self.reply_socket.send_multipart(to_send, track=True, copy=False) m.wait() if not partial: self.reply_socket.close() self.reply_socket = None return def send_message(self, addresses, msg, partial=False, error=False): """Send response message from main worker process. Please do not call this method from do_work() :param addresses: return address stack :type addresses: list of str :param msg: return value from called method :type msg: mixed :param partial: if the message is partial or final :type partial: bool :param error: if the message is error :type error: bool """ to_send = [b'', MDP_WORKER_VERSION] if partial: to_send.append(b'\x03') elif error: to_send.append(b'\x09') else: to_send.append(b'\x04') to_send.extend(addresses) to_send.append(b'') if isinstance(msg, list): to_send.extend(msg) else: to_send.append(msg) self.stream.send_multipart(to_send) return def _on_request(self, addresses, message): """Helper method called on RPC message receive. """ # remove finished forks self._remove_finished_processes() # test max forks if len(self.forks) >= self.max_forks: self.send_message(addresses, b'max workers limit exceeded', error=True) self.on_max_forks(addresses, message) return name = message[0] args = msgpack.unpackb(message[1]) kwargs = msgpack.unpackb(message[2]) p = Process(target=self.do_work, args=(addresses, name, args, kwargs)) p.start() p._args = None # free memory self.forks.append(p) return def _remove_finished_processes(self): """Helper method dedicated to cleaning list of forked workers """ for f in [f for f in self.forks if not f.is_alive()]: self.forks.remove(f) return def split_address(self, msg): """Function to split return Id and message received by ROUTER socket. Returns 2-tuple with return Id and remaining message parts. Empty frames after the Id are stripped. """ ret_ids = [] for i, p in enumerate(msg): if p: ret_ids.append(p) else: break return ret_ids, msg[i + 1:] def on_log_event(self, event, message): """Override this method if you want to log events from broker :type event: str :param event: event type - used for filtering :type message: str :param message: log message :rtype: None """ pass def on_max_forks(self, addresses, message): """This method is called when max_forks limit is reached You can override this method. """ pass def do_work(self, addresses, name, args, kwargs): """Main method responsible for handling rpc calls, and sending response messages. Please override this method! :param addresses: return address stack :type addresses: list of str :param name: name of task :type name: str :param args: positional task arguments :type args: list :param kwargs: key-value task arguments :type kwargs: dict """ # this is example of simple response message self.send_reply(addresses, 'method not implemented') # and send message to main worker # you can also send partial message and exception - read 'send_reply' docs return
class MQRep(object): """Class for the MDP worker side. Thin encapsulation of a zmq.DEALER socket. Provides a send method with optional timeout parameter. Will use a timeout to indicate a broker failure. """ _proto_version = b'MDPW01' # TODO: integrate that into API HB_INTERVAL = 1000 # in milliseconds HB_LIVENESS = 3 # HBs to miss before connection counts as dead def __init__(self, context, service): """Initialize the MDPWorker. context is the zmq context to create the socket from. service is a byte-string with the service name. """ if DEBUG: print("MQRep > __init__") cfg = Loader('mq').load() config = dict(cfg[1]) if config['ip'].strip() == "*": config['ip'] = get_ip() self.endpoint = "tcp://{0}:{1}".format(config['ip'], config['req_rep_port']) self.context = context self.service = service self.stream = None self._tmo = None self.need_handshake = True self.ticker = None self._delayed_cb = None self._create_stream() ### patch fritz self._reconnect_in_progress = False ### end patch fritz return def _create_stream(self): """Helper to create the socket and the stream. """ if DEBUG: print("MQRep > _create_stream") socket = ZmqSocket(self.context, zmq.DEALER) ioloop = IOLoop.instance() self.stream = ZMQStream(socket, ioloop) self.stream.on_recv(self._on_mpd_message) self.stream.socket.setsockopt(zmq.LINGER, 0) self.stream.connect(self.endpoint) if self.ticker != None: if DEBUG: print("MQRep > _create_stream - stop ticker") self.ticker.stop() self.ticker = PeriodicCallback(self._tick, self.HB_INTERVAL) self._send_ready() self.ticker.start() return def _send_ready(self): """Helper method to prepare and send the workers READY message. """ if DEBUG: print("MQREP > _send_ready") ready_msg = [ b'', self._proto_version, b'\x01', self.service ] self.stream.send_multipart(ready_msg) self.curr_liveness = self.HB_LIVENESS if DEBUG: print("MQREP > _send_ready > curr_liveness <= {0}".format(self.HB_LIVENESS)) return def _tick(self): """Method called every HB_INTERVAL milliseconds. """ if DEBUG: print("MQREP > _tick") self.curr_liveness -= 1 if DEBUG: print('MQREP > _tick - {0} tick = {1}'.format(time.time(), self.curr_liveness)) self.send_hb() if self.curr_liveness >= 0: return if DEBUG: print('MQREP > _tick - {0} lost connection'.format(time.time())) # ouch, connection seems to be dead self.shutdown() # try to recreate it self._delayed_cb = DelayedCallback(self._create_stream, self.HB_INTERVAL) self._delayed_cb.start() return def send_hb(self): """Construct and send HB message to broker. """ msg = [ b'', self._proto_version, b'\x04' ] self.stream.send_multipart(msg) return def shutdown(self): """Method to deactivate the worker connection completely. Will delete the stream and the underlying socket. """ if self.ticker: self.ticker.stop() self.ticker = None if not self.stream: return self.stream.socket.close() self.stream.close() self.stream = None self.timed_out = False self.need_handshake = True self.connected = False return def reply(self, msg): """Send the given message. msg can either be a byte-string or a list of byte-strings. """ ## if self.need_handshake: ## raise ConnectionNotReadyError() # prepare full message to_send = self.envelope self.envelope = None if isinstance(msg, list): to_send.extend(msg) else: to_send.append(msg) self.stream.send_multipart(to_send) return def _on_mpd_message(self, msg): """Helper method called on message receive. msg is a list w/ the message parts """ if DEBUG: print("MQRep > _on_mpd_message : {0} - {1}".format(time.strftime("%H:%M:%S"), msg)) # 1st part is empty msg.pop(0) # 2nd part is protocol version # TODO: version check proto = msg.pop(0) # 3rd part is message type msg_type = msg.pop(0) # XXX: hardcoded message types! # any message resets the liveness counter self.need_handshake = False self.curr_liveness = self.HB_LIVENESS if DEBUG: print("MQREP > _on_mpd_message > curr_liveness <= {0}".format(self.HB_LIVENESS)) if msg_type == b'\x05': # disconnect if DEBUG: print("MQREP > _on_mpd_message > type x05 : disconnect") self.curr_liveness = 0 # reconnect will be triggered by hb timer elif msg_type == b'\x02': # request if DEBUG: print("MQREP > _on_mpd_message > type x02 : request") # remaining parts are the user message envelope, msg = split_address(msg) envelope.append(b'') envelope = [ b'', self._proto_version, b'\x03'] + envelope # REPLY self.envelope = envelope mes = MQMessage() mes.set(msg) #print("MQRep > before self.on_mdp_request") #print(self.on_mdp_request) #print(mes) try: self.on_mdp_request(mes) except: print("ERROR {0}".format(traceback.format_exc())) else: if DEBUG: print("MQREP > _on_mpd_message > type ??? : invalid or hbeat") # invalid message # ignored # if \x04, this is a hbeat message pass return def on_mdp_request(self, msg): """Public method called when a request arrived. Must be overloaded! """ pass
class TaskRouter(CCHandler): """Keep track of host routes. Clean old ones. """ log = skytools.getLogger('h:TaskRouter') CC_ROLES = ['remote'] def __init__(self, *args): super(TaskRouter, self).__init__(*args) self.route_map = {} self.reply_map = {} # 1 hr? XXX self.route_lifetime = self.cf.getint ('route-lifetime', 1 * 60 * 60) self.reply_timeout = self.cf.getint ('reply-timeout', 5 * 60) self.maint_period = self.cf.getint ('maint-period', 1 * 60) self.timer = PeriodicCallback(self.do_maint, self.maint_period*1000, self.ioloop) self.timer.start() def handle_msg(self, cmsg): """ Got task from client or reply from TaskRunner / CCTask. Dispatch task request to registered TaskRunner. Dispatch task reply to requestor (client). """ self.log.trace('got message: %r', cmsg) req = cmsg.get_dest() sreq = req.split('.') if req == 'task.register': self.register_host (cmsg) elif sreq[:2] == ['task','send']: self.send_host (cmsg) elif sreq[:2] == ['task','reply']: self.send_reply (cmsg) else: self.log.warning('unknown msg: %s', req) def do_maint(self): """Drop old routes""" self.log.debug('cleanup') now = time.time() zombies = [] for hr in self.route_map.itervalues(): if now - hr.create_time > self.route_lifetime: zombies.append(hr) for hr in zombies: self.log.info('deleting route for %s', hr.host) del self.route_map[hr.host] self.stat_inc('dropped_routes') zombies = [] for rr in self.reply_map.itervalues(): if now - rr.atime > self.reply_timeout: zombies.append(rr) for rr in zombies: self.log.info('deleting reply route for %s', rr.uid) del self.reply_map[rr.uid] self.stat_inc('dropped_tasks') def register_host (self, cmsg): """Remember ZMQ route for host""" route = cmsg.get_route() msg = cmsg.get_payload(self.xtx) if not msg: return host = msg.host self.log.info('Got registration for %s', host) hr = HostRoute (host, route) self.route_map[hr.host] = hr self.stat_inc ('task.register') # FIXME: proper reply? #zans = route + [''] + ['OK'] #self.cclocal.send_multipart(zans) def send_host (self, cmsg): """Send message for task executor on host""" msg = cmsg.get_payload (self.xtx) host = msg.task_host if host not in self.route_map: self.ccerror(cmsg, 'cannot route to %s' % host) return inr = cmsg.get_route() # route from/to client hr = self.route_map[host] # find ZMQ route to host cmsg.set_route (hr.route) # re-construct message # send the message self.log.debug('sending task to %s', host) cmsg.send_to (self.cclocal) self.stat_inc ('task.send') # remember ZMQ route for replies req = cmsg.get_dest() uid = req.split('.')[2] rr = ReplyRoute (uid, inr) self.reply_map[uid] = rr # send ack to client rep = TaskReplyMessage( req = 'task.reply.%s' % uid, handler = msg['task_handler'], task_id = msg['task_id'], status = 'forwarded') rcm = self.xtx.create_cmsg (rep) rcm.set_route (inr) rcm.send_to (self.cclocal) self.log.debug('saved client for %r', uid) def send_reply (self, cmsg): """ Send reply message back to task requestor """ req = cmsg.get_dest() uid = req.split('.')[2] if uid not in self.reply_map: self.log.info ("cannot route back: %s", req) return self.log.debug ("req: %s", req) rr = self.reply_map[uid] # find ZMQ route cmsg.set_route (rr.route) # re-route message cmsg.send_to (self.cclocal) rr.atime = time.time() # update feedback time self.stat_inc ('task.reply') def ccreply(self, rep, creq): crep = self.xtx.create_cmsg(rep) crep.take_route(creq) crep.send_to(self.cclocal) def ccerror(self, cmsg, errmsg): self.log.info(errmsg) rep = ErrorMessage(msg = errmsg) self.ccreply(rep, cmsg) def stop (self): super(TaskRouter, self).stop() self.log.info ("stopping") self.timer.stop()
class Echo (CCHandler): """ Echo handler / sender / monitor """ CC_ROLES = ['local', 'remote'] log = skytools.getLogger ('h:Echo') ping_tick = 1 zmq_hwm = 1 zmq_linger = 0 def __init__ (self, hname, hcf, ccscript): super(Echo, self).__init__(hname, hcf, ccscript) self.echoes = {} # echo stats for monitored peers self.stream = {} # connections to monitored peers for url in self.cf.getlist ("ping-remotes", ""): sock = self._make_socket (url) self.stream[url] = CCStream (sock, ccscript.ioloop, qmaxsize = self.zmq_hwm) self.stream[url].on_recv (self.on_recv) self.echoes[url] = EchoState (url) self.log.debug ("will ping %s", url) self.timer = PeriodicCallback (self.ping, self.ping_tick * 1000, self.ioloop) self.timer.start() def _make_socket (self, url): """ Create socket for pinging remote CC. """ sock = self.zctx.socket (zmq.XREQ) sock.setsockopt (zmq.HWM, self.zmq_hwm) sock.setsockopt (zmq.LINGER, self.zmq_linger) sock.connect (url) return sock def on_recv (self, zmsg): """ Got reply from a remote CC, process it. """ try: self.log.trace ("%r", zmsg) cmsg = CCMessage (zmsg) req = cmsg.get_dest() if req == "echo.response": self.process_response (cmsg) else: self.log.warn ("unknown msg: %s", req) except: self.log.exception ("crashed, dropping msg") def handle_msg (self, cmsg): """ Got a message, process it. """ self.log.trace ("%r", cmsg) req = cmsg.get_dest() if req == "echo.request": self.process_request (cmsg) else: self.log.warn ("unknown msg: %s", req) def process_request (self, cmsg): """ Ping received, respond with pong. """ msg = cmsg.get_payload (self.xtx) if not msg: return rep = EchoResponseMessage( orig_hostname = msg['hostname'], orig_target = msg['target'], orig_time = msg['time']) rcm = self.xtx.create_cmsg (rep) rcm.take_route (cmsg) rcm.send_to (self.cclocal) def process_response (self, cmsg): """ Pong received, evaluate it. """ msg = cmsg.get_payload (self.xtx) if not msg: return url = msg.orig_target if url not in self.echoes: self.log.warn ("unknown pong: %s", url) return echo = self.echoes[url] echo.update_pong (msg) rtt = echo.time_pong - msg.orig_time if msg.orig_time == echo.time_ping: self.log.trace ("echo time: %f s (%s)", rtt, url) elif rtt <= 5 * self.ping_tick: self.log.debug ("late pong: %f s (%s)", rtt, url) else: self.log.info ("too late pong: %f s (%s)", rtt, url) def send_request (self, url): """ Send ping to remote CC. """ msg = EchoRequestMessage( target = url) cmsg = self.xtx.create_cmsg (msg) self.stream[url].send_cmsg (cmsg) self.echoes[url].update_ping (msg) self.log.trace ("%r", msg) def ping (self): """ Echo requesting and monitoring. """ self.log.trace ("") for url in self.stream: echo = self.echoes[url] if echo.time_ping - echo.time_pong > 5 * self.ping_tick: self.log.warn ("no pong from %s for %f s", url, echo.time_ping - echo.time_pong) self.send_request (url) def stop (self): super(Echo, self).stop() self.log.info ("stopping") self.timer.stop()
class TailWriter (CCHandler): """ Simply appends to files (with help from workers) """ CC_ROLES = ['remote'] log = skytools.getLogger ('h:TailWriter') def __init__ (self, hname, hcf, ccscript): super(TailWriter, self).__init__(hname, hcf, ccscript) self.files = {} self.workers = [] self.wparams = {} # passed to workers self.wparams['dstdir'] = self.cf.getfile ('dstdir') self.wparams['host_subdirs'] = self.cf.getbool ('host-subdirs', 0) self.wparams['maint_period'] = self.cf.getint ('maint-period', 3) self.wparams['write_compressed'] = self.cf.get ('write-compressed', '') assert self.wparams['write_compressed'] in [None, '', 'no', 'keep', 'yes'] if self.wparams['write_compressed'] in ('keep', 'yes'): self.log.info ("position checking not supported for compressed files") if self.wparams['write_compressed'] == 'yes': self.wparams['compression'] = self.cf.get ('compression', '') if self.wparams['compression'] not in ('gzip', 'bzip2'): self.log.error ("unsupported compression: %s", self.wparams['compression']) self.wparams['compression_level'] = self.cf.getint ('compression-level', '') self.wparams['buf_maxbytes'] = cc.util.hsize_to_bytes (self.cf.get ('buffer-bytes', '1 MB')) if self.wparams['buf_maxbytes'] < BUF_MINBYTES: self.log.info ("buffer-bytes too low, adjusting: %i -> %i", self.wparams['buf_maxbytes'], BUF_MINBYTES) self.wparams['buf_maxbytes'] = BUF_MINBYTES # initialise sockets for communication with workers self.dealer_stream, self.dealer_url = self.init_comm (zmq.XREQ, 'inproc://workers-dealer', self.dealer_on_recv) self.router_stream, self.router_url = self.init_comm (zmq.XREP, 'inproc://workers-router', self.router_on_recv) self.launch_workers() self.timer_maint = PeriodicCallback (self.do_maint, self.wparams['maint_period'] * 1000, self.ioloop) self.timer_maint.start() def init_comm (self, stype, url, cb): """ Create socket, stream, etc for communication with workers. """ sock = self.zctx.socket (stype) port = sock.bind_to_random_port (url) curl = "%s:%d" % (url, port) stream = CCStream (sock, self.ioloop) stream.on_recv (cb) return (stream, curl) def launch_workers (self): """ Create and start worker threads. """ nw = self.cf.getint ('worker-threads', 10) for i in range (nw): wname = "%s.worker-%i" % (self.hname, i) self.log.info ("starting %s", wname) w = TailWriter_Worker( wname, self.xtx, self.zctx, self.ioloop, self.dealer_url, self.router_url, self.wparams) w.stat_inc = self.stat_inc # XXX self.workers.append (w) w.start() def handle_msg (self, cmsg): """ Got message from client, process it. """ data = cmsg.get_payload (self.xtx) if not data: return host = data['hostname'] fn = data['filename'] st_dev = data.get('st_dev') st_ino = data.get('st_ino') fi = (host, st_dev, st_ino, fn) if fi in self.files: fd = self.files[fi] if fd.waddr: # already accepted ? self.log.trace ("passing %r to %s", fn, fd.wname) fd.queue.append (cmsg) fd.send_to (self.router_stream) else: self.log.trace ("queueing %r", fn) fd.queue.append (cmsg) else: fd = FileState (fi, 1) self.files[fi] = fd self.log.trace ("offering %r", fn) self.dealer_stream.send_cmsg (cmsg) def dealer_on_recv (self, zmsg): """ Got reply from worker via "dealer" connection """ self.log.warning ("reply via dealer: %s", zmsg) def router_on_recv (self, zmsg): """ Got reply from worker via "router" connection """ cmsg = CCMessage (zmsg) data = cmsg.get_payload (self.xtx) fi = (data['d_hostname'], data['d_st_dev'], data['d_st_ino'], data['d_filename']) fd = self.files[fi] if fd.waddr is None: fd.waddr = zmsg[0] fd.wname = data['worker'] else: assert fd.waddr == zmsg[0] and fd.wname == data['worker'] fd.atime = time.time() fd.count -= 1 assert fd.count >= 0 def do_maint (self): """ Check & flush queues; drop inactive files. """ self.log.trace ('cleanup') now = time.time() zombies = [] for k, fd in self.files.iteritems(): if fd.queue and fd.waddr: self.log.trace ("passing %r to %s", fd.ident, fd.wname) fd.send_to (self.router_stream) if (fd.count == 0) and (now - fd.atime > 2 * CLOSE_DELAY): # you'd better use msg for this self.log.debug ("forgetting %r", fd.ident) zombies.append(k) for k in zombies: self.files.pop(k) def stop (self): """ Signal workers to shut down. """ super(TailWriter, self).stop() self.log.info ('stopping') self.timer_maint.stop() for w in self.workers: self.log.info ("signalling %s", w.name) w.stop()
class ProxyHandler (BaseProxyHandler): """ Simply proxies further """ log = skytools.getLogger ('h:ProxyHandler') ping_tick = 1 def __init__ (self, hname, hcf, ccscript): super(ProxyHandler, self).__init__(hname, hcf, ccscript) self.ping_remote = self.cf.getbool ("ping", False) if self.ping_remote: self.echo_stats = EchoState (self.remote_url) self.echo_timer = PeriodicCallback (self.ping, self.ping_tick * 1000, self.ioloop) self.echo_timer.start() self.log.debug ("will ping %s", self.remote_url) def on_recv (self, zmsg): """ Got message from remote CC, process it. """ try: # pongs to our pings should come back w/o any routing info if self.ping_remote and zmsg[0] == '': self.log.trace ("%r", zmsg) cmsg = CCMessage (zmsg) req = cmsg.get_dest() if req == "echo.response": self._recv_pong (cmsg) else: self.log.warn ("unknown msg: %s", req) except: self.log.exception ("crashed") finally: super(ProxyHandler, self).on_recv(zmsg) def _recv_pong (self, cmsg): """ Pong received, evaluate it. """ msg = cmsg.get_payload (self.xtx) if not msg: return if msg.orig_target != self.remote_url: self.log.warn ("unknown pong: %s", msg.orig_target) return echo = self.echo_stats echo.update_pong (msg) rtt = echo.time_pong - msg.orig_time if msg.orig_time == echo.time_ping: self.log.trace ("echo time: %f s (%s)", rtt, self.remote_url) elif rtt <= 5 * self.ping_tick: self.log.debug ("late pong: %f s (%s)", rtt, self.remote_url) else: self.log.info ("too late pong: %f s (%s)", rtt, self.remote_url) def _send_ping (self): """ Send ping to remote CC. """ msg = EchoRequestMessage( target = self.remote_url) cmsg = self.xtx.create_cmsg (msg) self.stream.send_cmsg (cmsg) self.echo_stats.update_ping (msg) self.log.trace ("%r", msg) def ping (self): """ Echo requesting and monitoring. """ self.log.trace ("") miss = self.echo_stats.time_ping - self.echo_stats.time_pong if miss > 5 * self.ping_tick: self.log.warn ("no pong from %s for %f s", self.remote_url, miss) self._send_ping () def stop (self): super(ProxyHandler, self).stop() self.log.info ("stopping") if hasattr (self, "echo_timer"): self.echo_timer.stop()
class MDPWorker(object): """Class for the MDP worker side. Thin encapsulation of a zmq.DEALER socket. Provides a send method with optional timeout parameter. Will use a timeout to indicate a broker failure. """ _proto_version = b'MDPW01' # TODO: integrate that into API HB_INTERVAL = 1000 # in milliseconds HB_LIVENESS = 3 # HBs to miss before connection counts as dead def __init__(self, context, endpoint, service): """Initialize the MDPWorker. context is the zmq context to create the socket from. service is a byte-string with the service name. """ self.context = context self.endpoint = endpoint self.service = service self.stream = None self._tmo = None self.need_handshake = True self.ticker = None self._delayed_cb = None self._create_stream() return def _create_stream(self): """Helper to create the socket and the stream. """ socket = self.context.socket(zmq.DEALER) ioloop = IOLoop.instance() self.stream = ZMQStream(socket, ioloop) self.stream.on_recv(self._on_message) self.stream.socket.setsockopt(zmq.LINGER, 0) self.stream.connect(self.endpoint) self.ticker = PeriodicCallback(self._tick, self.HB_INTERVAL) self._send_ready() self.ticker.start() return def _send_ready(self): """Helper method to prepare and send the workers READY message. """ ready_msg = [b'', self._proto_version, b'\x01', self.service] self.stream.send_multipart(ready_msg) self.curr_liveness = self.HB_LIVENESS return def _tick(self): """Method called every HB_INTERVAL milliseconds. """ self.curr_liveness -= 1 ## print '%.3f tick - %d' % (time.time(), self.curr_liveness) self.send_hb() if self.curr_liveness >= 0: return ## print '%.3f lost connection' % time.time() # ouch, connection seems to be dead self.shutdown() # try to recreate it self._delayed_cb = DelayedCallback(self._create_stream, 5000) self._delayed_cb.start() return def send_hb(self): """Construct and send HB message to broker. """ msg = [b'', self._proto_version, b'\x04'] self.stream.send_multipart(msg) return def shutdown(self): """Method to deactivate the worker connection completely. Will delete the stream and the underlying socket. """ if self.ticker: self.ticker.stop() self.ticker = None if not self.stream: return self.stream.socket.close() self.stream.close() self.stream = None self.timed_out = False self.need_handshake = True self.connected = False return def reply(self, msg): """Send the given message. msg can either be a byte-string or a list of byte-strings. """ ## if self.need_handshake: ## raise ConnectionNotReadyError() # prepare full message to_send = self.envelope self.envelope = None if isinstance(msg, list): to_send.extend(msg) else: to_send.append(msg) self.stream.send_multipart(to_send) return def _on_message(self, msg): """Helper method called on message receive. msg is a list w/ the message parts """ # 1st part is empty msg.pop(0) # 2nd part is protocol version # TODO: version check proto = msg.pop(0) # 3rd part is message type msg_type = msg.pop(0) # XXX: hardcoded message types! # any message resets the liveness counter self.need_handshake = False self.curr_liveness = self.HB_LIVENESS if msg_type == b'\x05': # disconnect self.curr_liveness = 0 # reconnect will be triggered by hb timer elif msg_type == b'\x02': # request # remaining parts are the user message envelope, msg = split_address(msg) envelope.append(b'') envelope = [b'', self._proto_version, b'\x03'] + envelope # REPLY self.envelope = envelope self.on_request(msg) else: # invalid message # ignored pass return def on_request(self, msg): """Public method called when a request arrived. Must be overloaded! """ pass
class TailWriter(CCHandler): """ Simply appends to files (with help from workers) """ CC_ROLES = ['remote'] log = skytools.getLogger('h:TailWriter') def __init__(self, hname, hcf, ccscript): super(TailWriter, self).__init__(hname, hcf, ccscript) self.files = {} self.workers = [] self.wparams = {} # passed to workers self.wparams['dstdir'] = self.cf.getfile('dstdir') self.wparams['host_subdirs'] = self.cf.getbool('host-subdirs', 0) self.wparams['maint_period'] = self.cf.getint('maint-period', 3) self.wparams['write_compressed'] = self.cf.get('write-compressed', '') assert self.wparams['write_compressed'] in [ None, '', 'no', 'keep', 'yes' ] if self.wparams['write_compressed'] in ('keep', 'yes'): self.log.info( "position checking not supported for compressed files") if self.wparams['write_compressed'] == 'yes': self.wparams['compression'] = self.cf.get('compression', '') if self.wparams['compression'] not in ('gzip', 'bzip2'): self.log.error("unsupported compression: %s", self.wparams['compression']) self.wparams['compression_level'] = self.cf.getint( 'compression-level', '') self.wparams['buf_maxbytes'] = cc.util.hsize_to_bytes( self.cf.get('buffer-bytes', '1 MB')) if self.wparams['buf_maxbytes'] < BUF_MINBYTES: self.log.info("buffer-bytes too low, adjusting: %i -> %i", self.wparams['buf_maxbytes'], BUF_MINBYTES) self.wparams['buf_maxbytes'] = BUF_MINBYTES # initialise sockets for communication with workers self.dealer_stream, self.dealer_url = self.init_comm( zmq.XREQ, 'inproc://workers-dealer', self.dealer_on_recv) self.router_stream, self.router_url = self.init_comm( zmq.XREP, 'inproc://workers-router', self.router_on_recv) self.launch_workers() self.timer_maint = PeriodicCallback( self.do_maint, self.wparams['maint_period'] * 1000, self.ioloop) self.timer_maint.start() def init_comm(self, stype, url, cb): """ Create socket, stream, etc for communication with workers. """ sock = self.zctx.socket(stype) port = sock.bind_to_random_port(url) curl = "%s:%d" % (url, port) stream = CCStream(sock, self.ioloop) stream.on_recv(cb) return (stream, curl) def launch_workers(self): """ Create and start worker threads. """ nw = self.cf.getint('worker-threads', 10) for i in range(nw): wname = "%s.worker-%i" % (self.hname, i) self.log.info("starting %s", wname) w = TailWriter_Worker(wname, self.xtx, self.zctx, self.ioloop, self.dealer_url, self.router_url, self.wparams) w.stat_inc = self.stat_inc # XXX self.workers.append(w) w.start() def handle_msg(self, cmsg): """ Got message from client, process it. """ data = cmsg.get_payload(self.xtx) if not data: return host = data['hostname'] fn = data['filename'] st_dev = data.get('st_dev') st_ino = data.get('st_ino') fi = (host, st_dev, st_ino, fn) if fi in self.files: fd = self.files[fi] if fd.waddr: # already accepted ? self.log.trace("passing %r to %s", fn, fd.wname) fd.queue.append(cmsg) fd.send_to(self.router_stream) else: self.log.trace("queueing %r", fn) fd.queue.append(cmsg) else: fd = FileState(fi, 1) self.files[fi] = fd self.log.trace("offering %r", fn) self.dealer_stream.send_cmsg(cmsg) def dealer_on_recv(self, zmsg): """ Got reply from worker via "dealer" connection """ self.log.warning("reply via dealer: %s", zmsg) def router_on_recv(self, zmsg): """ Got reply from worker via "router" connection """ cmsg = CCMessage(zmsg) data = cmsg.get_payload(self.xtx) fi = (data['d_hostname'], data['d_st_dev'], data['d_st_ino'], data['d_filename']) fd = self.files[fi] if fd.waddr is None: fd.waddr = zmsg[0] fd.wname = data['worker'] else: assert fd.waddr == zmsg[0] and fd.wname == data['worker'] fd.atime = time.time() fd.count -= 1 assert fd.count >= 0 def do_maint(self): """ Check & flush queues; drop inactive files. """ self.log.trace('cleanup') now = time.time() zombies = [] for k, fd in self.files.iteritems(): if fd.queue and fd.waddr: self.log.trace("passing %r to %s", fd.ident, fd.wname) fd.send_to(self.router_stream) if (fd.count == 0) and (now - fd.atime > 2 * CLOSE_DELAY ): # you'd better use msg for this self.log.debug("forgetting %r", fd.ident) zombies.append(k) for k in zombies: self.files.pop(k) def stop(self): """ Signal workers to shut down. """ super(TailWriter, self).stop() self.log.info('stopping') self.timer_maint.stop() for w in self.workers: self.log.info("signalling %s", w.name) w.stop()
class DeviceConnection(object): SERVICE_NAME = 'device' # service to connect to TIMEOUT = 5 # time to wait for answer in seconds # Number of connections to try before restarting when in incorrect state CONNECTION_ATTEMPS = 3 HB_INTERVAL = 1000 * 10 # in milliseconds def __init__(self, context, device_id, address): self.context = context self.device_id = device_id self.address = address self.ticker = None self.updater = None self.socket = None self.can_send = False self._restart() self.heartbeat() return def _restart(self): self.shutdown() self.socket = self.context.socket(zmq.REQ) self.socket.setsockopt(zmq.LINGER, 0) self.socket.connect(self.address) self.can_send = True self.connection_attempts = 0 self.device_state = 'unknown' self.ticker = PeriodicCallback(self.heartbeat, DeviceConnection.HB_INTERVAL) self.ticker.start() # delay = seconds_till_next('hour', duration=1) + 2 # 2 second buffer delay = 60 # send update one minute after startup self.updater = DelayedCallback(self.update, delay * 1000) self.updater.start() return def shutdown(self): if self.ticker: self.ticker.stop() self.ticker = None if self.updater: self.updater.stop() if self.socket: self.socket.close() self.socket = None self.can_send = False return def send(self, message): if not self.can_send: self.connection_attempts += 1 logger.error("DeviceConnection is not in state to send") return None else: self.connection_attempts = 0 self.can_send = False logger.debug("DeviceConnection sending {0}".format(message)) reply = mdp_request(self.socket, DeviceConnection.SERVICE_NAME, message, DeviceConnection.TIMEOUT) if reply: logger.debug("DeviceConnection reply received: {0}".format(reply)) self.can_send = True return reply else: # Timeout! Will be restarted at next heartbeat logger.warn("DeviceConnection timeout. Will be restarted at next heartbeat") self.connection_attempts = DeviceConnection.CONNECTION_ATTEMPS return None def heartbeat(self): if self.connection_attempts >= DeviceConnection.CONNECTION_ATTEMPS: logger.warn("DeviceConnection attempts max reached. Restarting connection.") self._restart() message = HeartbeatMessage(self.device_id) reply = self.send([pickle.dumps(message)]) if reply: message = pickle.loads(reply[1]) if message.response == self.device_state: # device state has not changed pass elif message.response == 'unknown': logger.info("connection status set to 'unknown'") self.device_state = 'unknown' elif message.response == 'joined': logger.info("connection status set to 'joined'") self.device_state = 'joined' elif message.response == 'connected': logger.info("connection status set to 'connected'") self.device_state = 'connected' else: self._restart() return def create(self): logger.info("Sending create details for device") device_details = DeviceInfo(self.device_id, 'farm_monitor') device_details.create() message = [pickle.dumps(device_details)] reply = self.send(message) if reply: message = pickle.loads(reply[1]) logger.debug("Update response from {0} :{1}".format(message.source, message.reply)) return def update(self): self.updater.stop() if self.device_state == 'connected': logger.info("Sending update for device") update = DeviceUpdate(self.device_id, 'farm_monitor') update.create() reply = self.send([pickle.dumps(update)]) if reply: message = pickle.loads(reply[1]) logger.debug("Update response from {0} :{1}".format(message.source, message.reply)) # delay = seconds_till_next('hour', duration=1) + 2 # 2 second buffer delay = 60 * 60 # send next update in 60 minutes self.updater = DelayedCallback(self.update, delay * 1000) self.updater.start() return def backup(self): backup_index = create_backup() session = Session() backup = session.query(Backup).filter_by(index=backup_index).first() standalone_configuration = session.query(SystemSetup.standalone_configuration).scalar() if self.device_state == 'connected': failed_attempts = 0 # don't send the file if we are a combined configuration if standalone_configuration: logger.info("Sending device backup file to FarmMonitor") file_size = os.path.getsize(backup.filepath) bytes_sent = 0 zip_file = open(backup.filepath, 'r') while bytes_sent < file_size and failed_attempts < 3: zip_file.seek(bytes_sent) data = zip_file.read(512) logger.debug("Sending {0} bytes of backup file".format(len(data))) message = FileMessage(self.device_id, 'farm_monitor', 'backup') message.set_file(backup.filepath, data) reply = self.send([pickle.dumps(message)]) if reply: # only mark the data received if a reply is recieved bytes_sent += len(data) failed_attempts = 0 reply_message = pickle.loads(reply[1]) logger.debug("Update reply: {0}".format(reply_message.reply)) else: logger.warn("Failed attempt in sending backup file") failed_attempts += 1 zip_file.close() # only send the database object if the file was sent correctly if failed_attempts == 0: logger.info("Sending device backup database object to FarmMonitor") message = FileMessage(self.device_id, 'farm_monitor', 'backup') message.set_db_object(pickle.dumps(backup)) reply = self.send([pickle.dumps(message)]) if reply: reply_message = pickle.loads(reply[1]) logger.debug("Update response: {0}".format(reply_message.reply)) else: logger.warn("To many failed attempts. Backup failed to send to Farm Monitor") session.close() return def perform_update(self, update_id): session = Session() standalone_configuration = session.query(SystemSetup.standalone_configuration).scalar() if self.device_state == 'connected': # request the update db_object logger.info("Requesting update with id: {0} from FarmMonitor".format(update_id)) message = [self.device_id, 'perform_update', 'db_object', update_id] message = FileMessage(self.device_id, 'farm_monitor', 'perform_update') message.set_db_object(update_id) reply = self.send([pickle.dumps(message)]) if reply: reply_message = pickle.loads(reply[1]) logger.debug("Update response from farm_monitor") if reply_message.data: update = pickle.loads(reply_message.data) else: logger.warn("incorrect response in perform_update: {0}".format(reply[0])) session.close() return failed_attempts = 0 # don't request the file if we are a combined configuration if standalone_configuration: if not os.path.exists(update.filepath): os.makedirs(update.filepath) bytes_received = 0 more_to_send = True new_file = open(update.filepath + update.filename, 'a') while more_to_send and failed_attempts < 3: logger.info("Requesting update file from FarmMonitor") message = FileMessage(self.device_id, 'farm_monitor', 'perform_update') message.request_file(update_id, str(bytes_received)) reply = self.send([pickle.dumps(message)]) if reply: reply_message = pickle.loads(reply[1]) logger.debug("Update response from FarmMonitor") failed_attempts = 0 # in this case, reply_message.filepath is really a boolean. # reusing the FileMessage object more_to_send = reply_message.filepath data = reply_message.data new_file.seek(bytes_received) new_file.write(data) bytes_received += len(data) else: logger.warn("Failed attempt in requesting update file") failed_attempts += 1 new_file.close() # compare the hash of the file sha256 = hashlib.sha256() with open(update.filepath + update.filename, 'rb') as f: for block in iter(lambda: f.read(2048), b''): sha256.update(block) if update.sha256 == sha256.hexdigest(): logger.info("Update file successfully received") print('update received good') session.merge(update) session.commit() else: logger.warn("Update file hash does not match. Update failed") session.close() return # now perform the actual update run_update(update.id, 'farm_device') else: logger.warn("No reply when requesting update db_object from FarMonitor.") session.close() return
class MDPWorker(ZMQStream): def __init__(self, broker, service, io_loop=None): """Create and setup an MDP worker. @param broker A string containing the broker's URL @param service A string containing the service name @param io_loop An existing I/O loop object. If None, the default will be used. """ self.service=service self._broker = broker self.ctx = zmq.Context() sock = self.ctx.socket(zmq.DEALER) ZMQStream.__init__(self, sock, io_loop) # last watchdog timer tick self.watchdog = 0 # connection callback one-shot self._conncb = DelayedCallback(self.send_ready, 3000, self.io_loop) # heartbeat callback..runs continuous when connected self._hbcb = PeriodicCallback(self.send_heartbeat, 2000, self.io_loop) # number of connection attempts self._conn_attempt = 0 # waiting to connect state self._waiting_to_connect = True # have we been disconnected? (flags a reconnect attempt) self.disconnected = False # connect the socket and send a READY when the io_loop starts self.connect(self._broker) self._conncb.start() def reset_watchdog(self): """Private method used to reset the HEARTBEAT watchdog """ self.watchdog = time.time() def disconnect(self): """Disconnect from the broker. """ logging.info("Disconnected from broker") self.on_recv(None) # stop message processing self._conncb.stop() # stop any pending reconnect self._hbcb.stop() # stop heartbeats self.disconnected = True self.io_loop.stop() # stop the I/O loop. If it's used by something else, the caller can restart it def reconnect(self): """Try to reconnect to the broker. """ if self.disconnected: # don't try and reconnect, we got an explicit disconnect return logging.info("Attempting to reconnect to broker") self._hbcb.stop() self._conn_attempt = 0 self._waiting_to_connect = True try: self.connect(self._broker) except ZMQError: logging.exception() self.io_loop.stop() return self._conncb.start() def send_ready(self): """Send a READY message. """ if not self._waiting_to_connect: # connected already return if self.disconnected: # don't try and connect, we got an explicit disconnect return logging.debug("Sending READY") if self._conn_attempt >= 10: logging.error("10 connection attempts have failed. Giving up.") return self._conn_attempt += 1 logging.debug("Connection attempt %i" % self._conn_attempt) rdy = [b'', MDPW_VER, b'\x01', self.service] self.on_recv(self.on_message) self.send_multipart(rdy) # There is no reply to READY so # we must assume we are connected unless we see a DISCONNECT self._waiting_to_connect = False self._disconed = False self.reset_watchdog() self._hbcb.start() def send_reply(self, client, msg, partial=False): """Send a reply to a client. This is typically called from on_request() @param client The client identifier as passed to on_request() @param msg The message to send to the client. If this is a list, it's appended to the multipart; otherwise it is converted to a string and sent as a single frame. @param partial If this is True, the message is sent as a PARTIAL and at least one more call must be made to send_reply(). Otherwise a FINAL is sent and not more calls should be made to send_reply() until another request is processed. """ self._hbcb.stop() # stop while sending other messages if partial: rply = [b'', MDPW_VER, b'\x03', client, b''] else: rply = [b'', MDPW_VER, b'\x04', client, b''] if isinstance(msg, list): rply.extend(msg) else: rply.append(msg) try: logging.debug("Sending reply: %s" % msg_str(rply)) self.send_multipart(rply) except BaseException,e: logging.error("Error sending reply: " % e) self._hbcb.start() # restart heartbeats
class ZmqMaster(object, LoggingMixin): """ This is the ZMQ Master implementation. The master will send :class:`DataMessage` object to the workers and receive the processed messages. Unknown links will then be added to the frontier. """ def __init__(self, settings, identity, insocket, outsocket, mgmt, frontier, log_handler, log_level, io_loop): """ Initialize the master. """ LoggingMixin.__init__(self, log_handler, log_level) self._identity = identity self._io_loop = io_loop or IOLoop.instance() self._in_stream = ZMQStream(insocket, io_loop) self._out_stream = ZMQStream(outsocket, io_loop) self._mgmt = mgmt self._frontier = frontier self._running = False self._available_workers = [] # periodically check if there are pending URIs to crawl self._periodic_update = PeriodicCallback(self._send_next_uri, settings.MASTER_PERIODIC_UPDATE_INTERVAL, io_loop=io_loop) # start this periodic callback when you are waiting for the workers to # finish self._periodic_shutdown = PeriodicCallback(self._shutdown_wait, 500, io_loop=io_loop) self._shutdown_counter = 0 self._logger.debug("zmqmaster::initialized") def start(self): """ Start the master. """ self._mgmt.add_callback(ZMQ_SPYDER_MGMT_WORKER, self._worker_msg) self._in_stream.on_recv(self._receive_processed_uri) self._periodic_update.start() self._running = True self._logger.debug("zmqmaster::starting...") def stop(self): """ Stop the master gracefully, i.e. stop sending more URIs that should get processed. """ self._logger.debug("zmqmaster::stopping...") self._running = False self._periodic_update.stop() def shutdown(self): """ Shutdown the master and notify the workers. """ self._logger.debug("zmqmaster::shutdown...") self.stop() self._mgmt.publish(topic=ZMQ_SPYDER_MGMT_WORKER, identity=self._identity, data=ZMQ_SPYDER_MGMT_WORKER_QUIT) self._frontier.close() self._periodic_shutdown.start() def _shutdown_wait(self): """ Callback called from `self._periodic_shutdown` in order to wait for the workers to finish. """ self._shutdown_counter += 1 if 0 == len(self._available_workers) or self._shutdown_counter > 5: self._periodic_shutdown.stop() self._logger.debug("zmqmaster::bye bye...") self._io_loop.stop() def close(self): """ Close all open sockets. """ self._in_stream.close() self._out_stream.close() def finished(self): """ Return true if all uris have been processed and the master is ready to be shut down. """ return not self._running def _worker_msg(self, msg): """ Called when a worker has sent a :class:`MgmtMessage`. """ if ZMQ_SPYDER_MGMT_WORKER_AVAIL == msg.data: self._available_workers.append(msg.identity) self._logger.info("zmqmaster::A new worker is available (%s)" % msg.identity) self._send_next_uri() if ZMQ_SPYDER_MGMT_WORKER_QUIT_ACK == msg.data: if msg.identity in self._available_workers: self._available_workers.remove(msg.identity) self._logger.info("zmqmaster::Removing worker (%s)" % msg.identity) def _send_next_uri(self): """ See if there are more uris to process and send them to the workers if there are any. At this point there is a very small heuristic in order to maximize the throughput: try to keep the `self._out_stream._send_queue` full. """ if not self._running: self._logger.error("Master is not running, not sending more uris") return num_workers = len(self._available_workers) if self._running and num_workers > 0: while self._out_stream._send_queue.qsize() < num_workers * 4: try: next_curi = self._frontier.get_next() except Empty: # well, frontier has nothing to process right now self._logger.debug("zmqmaster::Nothing to crawl right now") break self._logger.info("zmqmaster::Begin crawling next URL (%s)" % next_curi.url) msg = DataMessage(identity=self._identity, curi=next_curi) self._out_stream.send_multipart(msg.serialize()) def _receive_processed_uri(self, raw_msg): """ Receive and reschedule an URI that has been processed. Additionally add all extracted URLs to the frontier. """ msg = DataMessage(raw_msg) self._logger.info("zmqmaster::Crawling URL (%s) finished" % msg.curi.url) try: if 200 <= msg.curi.status_code < 300: # we have some kind of success code! yay self._frontier.process_successful_crawl(msg.curi) elif 300 <= msg.curi.status_code < 400: # Some kind of redirect code. This will only happen if the number # of redirects exceeds settings.MAX_REDIRECTS self._frontier.process_redirect(msg.curi) elif 400 <= msg.curi.status_code < 500: # some kind of error where the resource could not be found. self._frontier.process_not_found(msg.curi) elif 500 <= msg.curi.status_code < 600: # some kind of server error self._frontier.process_server_error(msg.curi) except: self._logger.critical("zmqmaster::Uncaught exception in the sink") self._logger.critical("zmqmaster::%s" % (traceback.format_exc(),)) msg.curi.status_code = CURI_EUNCAUGHT_EXCEPTION self._frontier.process_server_error(msg.curi) self._send_next_uri()
class Master(object): def __init__(self, frontier, data_in_sock='ipc:///tmp/robot-data-w2m.sock', data_out_sock='ipc:///tmp/robot-data-m2w.sock', msg_in_sock='ipc:///tmp/robot-msg-w2m.sock', msg_out_sock='ipc:///tmp/robot-msg-m2w.sock', io_loop=None): self.identity = 'master:%s:%s' % (socket.gethostname(), os.getpid()) context = zmq.Context() self._io_loop = io_loop or IOLoop.instance() self._in_socket = context.socket(zmq.SUB) self._in_socket.setsockopt(zmq.SUBSCRIBE, '') self._in_socket.bind(data_in_sock) self._in_stream = ZMQStream(self._in_socket, io_loop) self._out_socket = context.socket(zmq.PUSH) self._out_socket.bind(data_out_sock) self._out_stream = ZMQStream(self._out_socket, io_loop) self._online_workers = set() self._running = False self._updater = PeriodicCallback(self._send_next, 100, io_loop=io_loop) self._reloader = PeriodicCallback(self.reload, 1000, io_loop=io_loop) self.frontier = frontier self.messenger = ServerMessenger(msg_in_sock, msg_out_sock, context, io_loop) def start(self): logging.info('[%s] starting', self.identity) self.messenger.add_callback(CTRL_MSG_WORKER, self._on_worker_msg) self.messenger.start() self._in_stream.on_recv(self._on_receive_processed) self._updater.start() self._reloader.start() self._running = True def stop(self): self._running = False self._reloader.stop() self._updater.stop() self.messenger.stop() # self.messenger.publish(CTRL_MSG_WORKER, self.identity, # CTRL_MSG_WORKER_QUIT) def close(self): self._in_stream.close() self._in_socket.close() self._out_stream.close() self._out_socket.close() self.messenger.close() def reload(self): pass def _on_worker_msg(self, msg): if msg.data == CTRL_MSG_WORKER_ONLINE: self._online_workers.add(msg.identity) logging.info('[%s] append [%s]', self.identity, msg.identity) self._send_next() # if msg.data == CTRL_MSG_WORKER_QUIT_ACK: # if msg.identity in self._online_workers: # self._online_workers.remove(msg.identity) def _send_next(self): if not self._running: return worker_num = len(self._online_workers) if self._running and worker_num > 0: while self._out_stream._send_queue.qsize() < worker_num * 4: request = self.frontier.get_next_request() if not request: break msg = RequestMessage(self.identity, request) self._out_stream.send_multipart(msg.serialize()) logging.debug('[%s] send request(%s)', self.identity, request.url) self.frontier.reload_request(request) def _on_receive_processed(self, zmq_msg): msg = ResponseMessage.deserialize(zmq_msg) request = msg.response.request logging.debug('[%s] receive response(%s)', self.identity, request.url) self._send_next()
class MDPWorker(object): """Class for the MDP worker side. Thin encapsulation of a zmq.DEALER socket. Provides a send method with optional timeout parameter. Will use a timeout to indicate a broker failure. """ _proto_version = W_WORKER # TODO: integrate that into API HB_INTERVAL = 2500 # in milliseconds HB_LIVENESS = 5 # HBs to miss before connection counts as dead RECONNECT_PERIOD = 5000 def __init__(self, context, endpoint, hb_endpoint, service, endpoint_callback=None): """Initialize the MDPWorker. context is the zmq context to create the socket from. service is a byte-string with the service name. """ self.context = context self.endpoint = endpoint self.hb_endpoint = hb_endpoint self.service = service self.endpoint_callback = endpoint_callback self.stream = None self.hb_stream = None self.ticker = None self._delayed_reconnect = None self._unique_id = '' self._create_stream() def _create_stream(self): """Helper to create the socket and the stream. """ logging.debug('Worker creating stream') ioloop = IOLoop.instance() socket = self.context.socket(zmq.DEALER) self.stream = ZMQStream(socket, ioloop) self.stream.on_recv(self._on_message) self.stream.socket.setsockopt(zmq.LINGER, 0) self.stream.connect(self.endpoint) socket = self.context.socket(zmq.DEALER) self.hb_stream = ZMQStream(socket, ioloop) self.hb_stream.on_recv(self._on_message) self.hb_stream.socket.setsockopt(zmq.LINGER, 0) self.hb_stream.connect(self.hb_endpoint) self.ticker = PeriodicCallback(self._tick, self.HB_INTERVAL) self._send_ready() self.ticker.start() def _send_ready(self): """Helper method to prepare and send the workers READY message. """ ready_msg = [EMPTY_FRAME, self._proto_version, W_READY, self.service] self.stream.send_multipart(ready_msg) self.curr_liveness = self.HB_LIVENESS def _tick(self): """Method called every HB_INTERVAL milliseconds. """ self.curr_liveness -= 1 logging.debug('Worker HB tick, current liveness: %d' % self.curr_liveness) self.send_hb() if self.curr_liveness >= 0: return # # Ouch, connection seems to be dead # logging.debug('Worker lost connection') self.shutdown() # # try to recreate the connection # self._delayed_reconnect = DelayedCallback(self._recreate_stream, self.RECONNECT_PERIOD) self._delayed_reconnect.start() def _recreate_stream(self): logging.debug('Worker trying to recreate stream') if self.endpoint_callback is not None: # # Check, maybe the ip of the proxy changed. # try: self.endpoint, self.hb_endpoint = self.endpoint_callback() except: # # Probably some problem in accessing the server. # self._delayed_reconnect = DelayedCallback( self._recreate_stream, self.RECONNECT_PERIOD) self._delayed_reconnect.start() return self._create_stream() def send_hb(self): """Construct and send HB message to broker. """ msg = [EMPTY_FRAME, self._proto_version, W_HEARTBEAT, self._unique_id] self.hb_stream.send_multipart(msg) def shutdown(self): """Method to deactivate the worker connection completely. Will delete the stream and the underlying socket. """ logging.debug('Shutdown of the worker') if self.ticker: logging.debug('Stopping the HB ticker') self.ticker.stop() self.ticker = None if not self.stream: return logging.debug('Closing the stream') self.stream.socket.close() self.stream.close() self.stream = None self.hb_stream.socket.close() self.hb_stream.close() self.hb_stream = None self.timed_out = False self.connected = False def reply(self, msg): """Send the given message. msg can either be a byte-string or a list of byte-strings. """ # # prepare full message # to_send = self.envelope self.envelope = None if isinstance(msg, list): to_send.extend(msg) else: to_send.append(msg) self.stream.send_multipart(to_send) def _on_message(self, msg): """Helper method called on message receive. msg is a list w/ the message parts """ logging.debug('Received message: {}'.format(msg)) # # 1st part is empty # msg.pop(0) # # 2nd part is protocol version # TODO: version check # proto = msg.pop(0) # # 3rd part is message type # msg_type = msg.pop(0) # # XXX: hardcoded message types! # any message resets the liveness counter # self.curr_liveness = self.HB_LIVENESS if msg_type == W_DISCONNECT: # # Disconnect. Reconnection will be triggered by hb timer # self.curr_liveness = 0 elif msg_type == W_READY: # # The message contains the unique id attached to the worker. # if len(msg) > 0: # # This above check is used for supporting older version of # the code. # self._unique_id = msg[0] elif msg_type == W_REQUEST: # # Request. Remaining parts are the user message # envelope, msg = split_address(msg) envelope.append(EMPTY_FRAME) envelope = [EMPTY_FRAME, self._proto_version, W_REPLY] + envelope self.envelope = envelope self.on_request(msg) else: # # invalid message # ignored # pass def on_request(self, msg): """Public method called when a request arrived. Must be overloaded! """ raise NotImplementedError( 'on_request must be implemented by the subclass.')
class MDPWorker(object): """Class for the MDP worker side. Thin encapsulation of a zmq.XREQ socket. Provides a send method with optional timeout parameter. Will use a timeout to indicate a broker failure. """ _proto_version = b'MDPW01' # TODO: integrate that into API HB_INTERVAL = 1000 # in milliseconds HB_LIVENESS = 3 # HBs to miss before connection counts as dead def __init__(self, context, endpoint, service): """Initialize the MDPWorker. context is the zmq context to create the socket from. service is a byte-string with the service name. """ self.context = context self.endpoint = endpoint self.service = service self.stream = None self._tmo = None self.need_handshake = True self.ticker = None self._delayed_cb = None self._create_stream() return def _create_stream(self): """Helper to create the socket and the stream. """ socket = self.context.socket(zmq.XREQ) ioloop = IOLoop.instance() self.stream = ZMQStream(socket, ioloop) self.stream.on_recv(self._on_message) self.stream.socket.setsockopt(zmq.LINGER, 0) self.stream.connect(self.endpoint) self.ticker = PeriodicCallback(self._tick, self.HB_INTERVAL) self._send_ready() self.ticker.start() return def _send_ready(self): """Helper method to prepare and send the workers READY message. """ ready_msg = [ b'', self._proto_version, chr(1), self.service ] self.stream.send_multipart(ready_msg) self.curr_liveness = self.HB_LIVENESS return def _tick(self): """Method called every HB_INTERVAL milliseconds. """ self.curr_liveness -= 1 ## print '%.3f tick - %d' % (time.time(), self.curr_liveness) self.send_hb() if self.curr_liveness >= 0: return print '%.3f lost connection' % time.time() # ouch, connection seems to be dead self.shutdown() # try to recreate it self._delayed_cb = DelayedCallback(self._create_stream, 5000) self._delayed_cb.start() return def send_hb(self): """Construct and send HB message to broker. """ msg = [ b'', self._proto_version, chr(4) ] self.stream.send_multipart(msg) return def shutdown(self): """Method to deactivate the worker connection completely. Will delete the stream and the underlying socket. """ if self.ticker: self.ticker.stop() self.ticker = None if not self.stream: return self.stream.socket.close() self.stream.close() self.stream = None self.timed_out = False self.need_handshake = True self.connected = False return def reply(self, msg): """Send the given message. msg can either be a byte-string or a list of byte-strings. """ ## if self.need_handshake: ## raise ConnectionNotReadyError() # prepare full message to_send = self.envelope self.envelope = None if isinstance(msg, list): to_send.extend(msg) else: to_send.append(msg) self.stream.send_multipart(to_send) return def _on_message(self, msg): """Helper method called on message receive. msg is a list w/ the message parts """ # 1st part is empty msg.pop(0) # 2nd part is protocol version # TODO: version check proto = msg.pop(0) # 3nd part is message type msg_type = msg.pop(0) # XXX: hardcoded message types! # any message resets the liveness counter self.need_handshake = False self.curr_liveness = self.HB_LIVENESS if msg_type == '\x05': # disconnect print ' DISC' self.curr_liveness = 0 # reconnect will be triggered by hb timer elif msg_type == '\x02': # request # remaining parts are the user message envelope, msg = split_address(msg) envelope.append(b'') envelope = [ b'', self._proto_version, '\x03'] + envelope # REPLY self.envelope = envelope self.on_request(msg) else: # invalid message # ignored pass return def on_request(self, msg): """Public method called when a request arrived. Must be overloaded! """ pass
class Master(object): def __init__(self, frontier, data_in_sock='ipc:///tmp/robot-data-w2m.sock', data_out_sock='ipc:///tmp/robot-data-m2w.sock', msg_in_sock='ipc:///tmp/robot-msg-w2m.sock', msg_out_sock='ipc:///tmp/robot-msg-m2w.sock', io_loop=None): self.identity = 'master:%s:%s' % (socket.gethostname(), os.getpid()) context = zmq.Context() self._io_loop = io_loop or IOLoop.instance() self._in_socket = context.socket(zmq.SUB) self._in_socket.setsockopt(zmq.SUBSCRIBE, '') self._in_socket.bind(data_in_sock) self._in_stream = ZMQStream(self._in_socket, io_loop) self._out_socket = context.socket(zmq.PUSH) self._out_socket.bind(data_out_sock) self._out_stream = ZMQStream(self._out_socket, io_loop) self._online_workers = set() self._running = False self._updater = PeriodicCallback(self._send_next, 100, io_loop=io_loop) self._reloader = PeriodicCallback(self.reload, 1000, io_loop=io_loop) self.frontier = frontier self.messenger = ServerMessenger(msg_in_sock, msg_out_sock, context, io_loop) def start(self): logging.info('[%s] starting', self.identity) self.messenger.add_callback(CTRL_MSG_WORKER, self._on_worker_msg) self.messenger.start() self._in_stream.on_recv(self._on_receive_processed) self._updater.start() self._reloader.start() self._running = True def stop(self): self._running = False self._reloader.stop() self._updater.stop() self.messenger.stop() # self.messenger.publish(CTRL_MSG_WORKER, self.identity, # CTRL_MSG_WORKER_QUIT) def close(self): self._in_stream.close() self._in_socket.close() self._out_stream.close() self._out_socket.close() self.messenger.close() def reload(self): pass def _on_worker_msg(self, msg): if msg.data == CTRL_MSG_WORKER_ONLINE: self._online_workers.add(msg.identity) logging.info('[%s] append [%s]', self.identity, msg.identity) self._send_next() # if msg.data == CTRL_MSG_WORKER_QUIT_ACK: # if msg.identity in self._online_workers: # self._online_workers.remove(msg.identity) def _send_next(self): if not self._running: return worker_num = len(self._online_workers) if self._running and worker_num > 0: while self._out_stream._send_queue.qsize() < worker_num * 4: request = self.frontier.get_next_request() if not request: break msg = RequestMessage(self.identity, request) self._out_stream.send_multipart(msg.serialize()) logging.debug('[%s] send request(%s)', self.identity, request.url) self.frontier.reload_request(request) def _on_receive_processed(self, zmq_msg): msg = ResponseMessage.deserialize(zmq_msg) request = msg.response.request logging.debug('[%s] receive response(%s)', self.identity, request.url) self._send_next()
class LogWatch_HandleStats(PgLogForwardPlugin): LOG_FORMATS = ['netstr'] def init(self, log_fmt): super(LogWatch_HandleStats, self).init(log_fmt) # depends on pg_settings.log_function_calls self.parse_statements = self.cf.getbool('parse_statements', True) self.msg_suffix = self.cf.get('msg-suffix', 'confdb') if self.msg_suffix and not is_msg_req_valid(self.msg_suffix): self.log.error("invalid msg-suffix: %s", self.msg_suffix) self.msg_suffix = None self.hostname = socket.gethostname() self.stat_queue_name = self.cf.get('stat_queue_name', '') self.max_stat_items = self.cf.get('max_stat_items', 10000) self.stat_dump_interval = self.cf.getint('stat_interval', 3600) self.last_stat_dump = time.time() self.client_stats = {} self.timer = PeriodicCallback(self.save_stats, self.stat_dump_interval * 1000) self.timer.start() def process_netstr(self, data): """ Process contents of collected log chunk. This might be a SQL statement or a connect/disconnect entry. """ if not self.stat_queue_name: return if data['remotehost'] == "[local]": data['remotehost'] = "127.0.0.1" action = None action_duration = 0 statement_duration = 0 call_count = 0 if data['message'].startswith("connection authorized:"): action = "connect" elif data['message'].startswith("disconnection"): action = "disconnect" m = rc_disconnect.match(data['message']) if m: action_duration = (int(m.group('hours')) * 3600 + int(m.group('minutes')) * 60 + float(m.group('seconds'))) * 1000 elif not self.parse_statements: # we have function logging enabled, see if we can use it m = rc_logged_func.search(data['message']) if m: # a logged function call, definitely prefer this to parsing action = m.group('func_name') action_duration = float(m.group('time')) / 1000 call_count = int(m.group('calls')) if not action: # we have to parse function call m = rc_sql.search(data['message']) if m: if self.parse_statements: # attempt to parse the function name and parameters #action = self.get_sql_action (m.group('sql')) call_count = 1 # count the overall statement duration action_duration = float(m.group('duration')) statement_duration = action_duration self._update_stats(data, action, action_duration, call_count) self._update_stats(data, "SQL statements", statement_duration, call_count) def _update_stats(self, data, action, duration, call_count): if action: key = (data['database'], data['username'], data['remotehost'], action) cs = self.client_stats.get(key) if cs: cs.update(duration, call_count) elif len(self.client_stats) > self.max_stat_items: self.log.error("Max stat items exceeded: %i", self.max_stat_items) else: cs = ClientStats(data['database'], data['username'], data['remotehost'], action, duration, call_count) self.client_stats[key] = cs def save_stats(self): """ Dump client stats to database. Scheduled to be called periodically. """ # do not send stats if stats is missing or stats queue is missing if not self.client_stats or not self.stat_queue_name: return now = time.time() time_passed = now - self.last_stat_dump self.log.info("Sending usage stats to repository [%i]", len(self.client_stats)) # post role usage usage = [] for client in self.client_stats.values(): self.log.trace("client: %s", client) usage.append(client.to_dict()) params = skytools.db_urlencode( dict(hostname=self.hostname, sample_length='%d seconds' % time_passed, snap_time=datetime.datetime.now().isoformat())) confdb_funcargs = ('username=discovery', params, skytools.make_record_array(usage)) funcargs = [ None, self.stat_queue_name, 'dba.set_role_usage', skytools.db_urlencode(dict(enumerate(confdb_funcargs))) ] msg = DatabaseMessage(function='pgq.insert_event', params=cc.json.dumps(funcargs)) if self.msg_suffix: msg.req += '.' + self.msg_suffix self.main.ccpublish(msg) self.client_stats = {} self.last_stat_dump = now def stop(self): self.timer.stop()
class ProxyHandler(BaseProxyHandler): """ Simply proxies further """ log = skytools.getLogger('h:ProxyHandler') ping_tick = 1 def __init__(self, hname, hcf, ccscript): super(ProxyHandler, self).__init__(hname, hcf, ccscript) self.ping_remote = self.cf.getbool("ping", False) if self.ping_remote: self.echo_stats = EchoState(self.remote_url) self.echo_timer = PeriodicCallback(self.ping, self.ping_tick * 1000, self.ioloop) self.echo_timer.start() self.log.debug("will ping %s", self.remote_url) def on_recv(self, zmsg): """ Got message from remote CC, process it. """ try: # pongs to our pings should come back w/o any routing info if self.ping_remote and zmsg[0] == '': self.log.trace("%r", zmsg) cmsg = CCMessage(zmsg) req = cmsg.get_dest() if req == "echo.response": self._recv_pong(cmsg) else: self.log.warn("unknown msg: %s", req) except: self.log.exception("crashed") finally: super(ProxyHandler, self).on_recv(zmsg) def _recv_pong(self, cmsg): """ Pong received, evaluate it. """ msg = cmsg.get_payload(self.xtx) if not msg: return if msg.orig_target != self.remote_url: self.log.warn("unknown pong: %s", msg.orig_target) return echo = self.echo_stats echo.update_pong(msg) rtt = echo.time_pong - msg.orig_time if msg.orig_time == echo.time_ping: self.log.trace("echo time: %f s (%s)", rtt, self.remote_url) elif rtt <= 5 * self.ping_tick: self.log.debug("late pong: %f s (%s)", rtt, self.remote_url) else: self.log.info("too late pong: %f s (%s)", rtt, self.remote_url) def _send_ping(self): """ Send ping to remote CC. """ msg = EchoRequestMessage(target=self.remote_url) cmsg = self.xtx.create_cmsg(msg) self.stream.send_cmsg(cmsg) self.echo_stats.update_ping(msg) self.log.trace("%r", msg) def ping(self): """ Echo requesting and monitoring. """ self.log.trace("") echo = self.echo_stats if echo.time_ping - echo.time_pong > 5 * self.ping_tick: self.log.warn("no pong from %s for %f s", self.remote_url, echo.time_ping - echo.time_pong) self._send_ping() def stop(self): super(ProxyHandler, self).stop() self.log.info("stopping") if hasattr(self, "echo_timer"): self.echo_timer.stop()
class MNWorker(MN_object): """Class for the MN worker side. Thin encapsulation of a zmq.DEALER socket. Provides a send method with optional timeout parameter. Will use a timeout to indicate a broker failure. :param context: the context to use for socket creation. :type context: zmq.Context :param endpoint: endpoint to connect to. :type endpoint: str :param service: the name of the service we support. :type service: byte-string """ _proto_version = b'MNPW01' # worker protocol version def __init__(self, context, endpoint, service, worker_type, address, protocols): """Initialize the MNWorker. """ self.context = context self.endpoint = endpoint self.service = service self.type = worker_type self.address = address self.protocols = protocols self.envelope = None self.HB_RETRIES = HB_RETRIES self.HB_INTERVAL = HB_INTERVAL self._data = {} self.stream = None self._tmo = None self.timed_out = False self.need_handshake = True self.connected = False self.ticker = None self._delayed_cb = None self._create_stream() _LOG.info("Worker initialized and can be found at '%s'" % endpoint) return def _create_stream(self): """Helper to create the socket and the stream. """ socket = self.context.socket(zmq.DEALER) ioloop = IOLoop.instance() self.stream = ZMQStream(socket, ioloop) self.stream.on_recv(self._on_message) self.stream.socket.setsockopt(zmq.LINGER, 0) self.stream.connect(self.endpoint) self.ticker = PeriodicCallback(self._tick, self.HB_INTERVAL) self._send_ready() self.ticker.start() return def _send_ready(self): """Helper method to prepare and send the workers READY message. """ _LOG.debug("Informing broker I am ready") ready_msg = [ b'', WORKER_PROTO, MSG_READY, self.service, self.type, self.address, self.protocols ] if self.stream.closed(): self.shutdown() self.stream.send_multipart(ready_msg) self.curr_retries = self.HB_RETRIES return def _tick(self): """Method called every HB_INTERVAL milliseconds. """ self.curr_retries -= 1 self.send_hb() if self.curr_retries >= 0: return # connection seems to be dead self.shutdown() # try to recreate it # self._delayed_cb = IOLoop.call_later(self._create_stream, 5000) # self._delayed_cb = IOLoop.add_timeout(self._create_stream, 5000) self._delayed_cb = DelayedCallback(self._create_stream, self.HB_INTERVAL) self._delayed_cb.start() return def send_hb(self): """Construct and send HB message to broker. """ _LOG.debug("Sending heartbeat") msg = [b'', WORKER_PROTO, MSG_HEARTBEAT] if self.stream.closed(): self.shutdown() self.stream.send_multipart(msg) return def shutdown(self): """Method to deactivate the worker connection completely. Will delete the stream and the underlying socket. """ if self.ticker: self.ticker.stop() self.ticker = None if not self.stream: return self.stream.socket.close() self.stream.close() self.stream = None self.timed_out = False self.need_handshake = True self.connected = False return def reply(self, msg): """Send the given message. :param msg: full message to send. :type msg: can either be a byte-string or a list of byte-strings """ if self.need_handshake: raise ConnectionNotReadyError() to_send = self.envelope self.envelope = None if isinstance(msg, list): to_send.extend(msg) else: to_send.append(msg) if self.stream.closed(): self.shutdown() self.stream.send_multipart(to_send) return def _on_message(self, msg): """Helper method called on message receive. :param msg: a list w/ the message parts :type msg: a list of byte-strings """ _LOG.debug("Received: %s." % msg) # 1st part is empty msg.pop(0) # 2nd part is protocol version proto = msg.pop(0) if proto != WORKER_PROTO: # ignore message from not supported protocol pass # 3rd part is message type msg_type = msg.pop(0) # XXX: hardcoded message types! # any message resets the retries counter self.need_handshake = False self.curr_retries = self.HB_RETRIES if msg_type == MSG_DISCONNECT: # disconnect _LOG.info("Broker wants us to disconnect.") self.curr_retries = 0 # reconnect will be triggered by hb timer elif msg_type == MSG_QUERY: # request # remaining parts are the user message _LOG.debug("Received new request: %s." % msg) envelope, msg = split_address(msg) envelope.append(b'') envelope = [b'', WORKER_PROTO, MSG_REPLY] + envelope # reply self.envelope = envelope self.on_request(msg) else: # invalid message # ignored _LOG.debug('ignoring message with invalid id') pass return def on_request(self, msg): """Public method called when a request arrived. :param msg: a list w/ the message parts :type msg: a list of byte-strings Must be overloaded to provide support for various services! """ pass
class TaskRouter(CCHandler): """Keep track of host routes. Clean old ones. """ log = skytools.getLogger("h:TaskRouter") CC_ROLES = ["remote"] def __init__(self, *args): super(TaskRouter, self).__init__(*args) self.route_map = {} self.reply_map = {} # 1 hr? XXX self.route_lifetime = self.cf.getint("route-lifetime", 1 * 60 * 60) self.reply_timeout = self.cf.getint("reply-timeout", 5 * 60) self.maint_period = self.cf.getint("maint-period", 1 * 60) self.timer = PeriodicCallback(self.do_maint, self.maint_period * 1000, self.ioloop) self.timer.start() def handle_msg(self, cmsg): """ Got task from client or reply from TaskRunner / CCTask. Dispatch task request to registered TaskRunner. Dispatch task reply to requestor (client). """ self.log.trace("got message: %r", cmsg) req = cmsg.get_dest() sreq = req.split(".") if req == "task.register": self.register_host(cmsg) elif sreq[:2] == ["task", "send"]: self.send_host(cmsg) elif sreq[:2] == ["task", "reply"]: self.send_reply(cmsg) else: self.log.warning("unknown msg: %s", req) def do_maint(self): """Drop old routes""" self.log.debug("cleanup") now = time.time() zombies = [] for hr in self.route_map.itervalues(): if now - hr.create_time > self.route_lifetime: zombies.append(hr) for hr in zombies: self.log.info("deleting route for %s", hr.host) del self.route_map[hr.host] self.stat_inc("dropped_routes") zombies = [] for rr in self.reply_map.itervalues(): if now - rr.atime > self.reply_timeout: zombies.append(rr) for rr in zombies: self.log.info("deleting reply route for %s", rr.uid) del self.reply_map[rr.uid] self.stat_inc("dropped_tasks") def register_host(self, cmsg): """Remember ZMQ route for host""" route = cmsg.get_route() msg = cmsg.get_payload(self.xtx) if not msg: return host = msg.host self.log.info("Got registration for %s", host) hr = HostRoute(host, route) self.route_map[hr.host] = hr self.stat_inc("task.register") # FIXME: proper reply? # zans = route + [''] + ['OK'] # self.cclocal.send_multipart(zans) def send_host(self, cmsg): """Send message for task executor on host""" msg = cmsg.get_payload(self.xtx) host = msg.task_host if host not in self.route_map: self.ccerror(cmsg, "cannot route to %s" % host) return inr = cmsg.get_route() # route from/to client hr = self.route_map[host] # find ZMQ route to host cmsg.set_route(hr.route) # re-construct message # send the message self.log.debug("sending task to %s", host) cmsg.send_to(self.cclocal) self.stat_inc("task.send") # remember ZMQ route for replies req = cmsg.get_dest() uid = req.split(".")[2] rr = ReplyRoute(uid, inr) self.reply_map[uid] = rr # send ack to client rep = TaskReplyMessage( req="task.reply.%s" % uid, handler=msg["task_handler"], task_id=msg["task_id"], status="forwarded" ) rcm = self.xtx.create_cmsg(rep) rcm.set_route(inr) rcm.send_to(self.cclocal) self.log.debug("saved client for %r", uid) def send_reply(self, cmsg): """ Send reply message back to task requestor """ req = cmsg.get_dest() uid = req.split(".")[2] if uid not in self.reply_map: self.log.info("cannot route back: %s", req) return self.log.debug("req: %s", req) rr = self.reply_map[uid] # find ZMQ route cmsg.set_route(rr.route) # re-route message cmsg.send_to(self.cclocal) rr.atime = time.time() # update feedback time self.stat_inc("task.reply") def ccreply(self, rep, creq): crep = self.xtx.create_cmsg(rep) crep.take_route(creq) crep.send_to(self.cclocal) def ccerror(self, cmsg, errmsg): self.log.info(errmsg) rep = ErrorMessage(msg=errmsg) self.ccreply(rep, cmsg) def stop(self): super(TaskRouter, self).stop() self.log.info("stopping") self.timer.stop()
class GlinApp: """Main Class for Management""" def __init__(self, led_count, hw_backend, port=6606): self.ctx = zmq.Context() self.led_count = led_count self.port = port self.loop = IOLoop.instance() self.caller = PeriodicCallback(self._on_next_frame, 1000/30) self.hw_communication = hw_backend self.hw_communication.connect() self.zmq_collector = GlinAppZmqCollector(self, self.ctx) self.zmq_publisher = GlinAppZmqPublisher(self, self.ctx) # server side configuration self.config = SimpleNamespace() self.config.max_fps = 60 # current state (somehow client side configuration) self.state = SimpleNamespace() self.state.animationClasses = [] self.state.activeSceneId = None self.state.activeAnimation = None self.state.scenes = {} self.state.brightness = 1.0 self.state.sceneIdCtr = 0 self.state.mainswitch = True self.state.target_fps = 0 self.state.lastFrameSent = None def set_brightness(self, brightness): """set general brightness in range 0...1""" brightness = min([1.0, max([brightness, 0.0])]) # enforces range 0 ... 1 self.state.brightness = brightness self._repeat_last_frame() sequence_number = self.zmq_publisher.publish_brightness(brightness) logging.debug("Set brightness to {brightPercent:05.1f}%".format(brightPercent=brightness*100)) return (True, sequence_number, "OK") def register_animation(self, animation_class): """Add a new animation""" self.state.animationClasses.append(animation_class) return len(self.state.animationClasses) - 1 def add_scene(self, animation_id, name, color, velocity, config): """Add a new scene, returns Scene ID""" # check arguments if animation_id < 0 or animation_id >= len(self.state.animationClasses): err_msg = "Requested to register scene with invalid Animation ID. Out of range." logging.info(err_msg) return(False, 0, err_msg) if self.state.animationClasses[animation_id].check_config(config) is False: err_msg = "Requested to register scene with invalid configuration." logging.info(err_msg) return(False, 0, err_msg) self.state.sceneIdCtr += 1 self.state.scenes[self.state.sceneIdCtr] = Scene(animation_id, name, color, velocity, config) sequence_number = self.zmq_publisher.publish_scene_add(self.state.sceneIdCtr, animation_id, name, color, velocity, config) logging.debug("Registered new scene.") # set this scene as active scene if none is configured yet if self.state.activeSceneId is None: self.set_scene_active(self.state.sceneIdCtr) return (True, sequence_number, "OK") def remove_scene(self, scene_id): """remove a scene by Scene ID""" if self.state.activeSceneId == scene_id: err_msg = "Requested to delete scene {sceneNum}, which is currently active. Cannot delete active scene.".format(sceneNum=scene_id) logging.info(err_msg) return(False, 0, err_msg) try: del self.state.scenes[scene_id] logging.debug("Deleted scene {sceneNum}".format(sceneNum=scene_id)) except KeyError: err_msg = "Requested to delete scene {sceneNum}, which does not exist".format(sceneNum=scene_id) logging.info(err_msg) return(False, 0, err_msg) # if we are here, we deleted a scene, so publish it sequence_number = self.zmq_publisher.publish_scene_remove(scene_id) logging.debug("Removed scene {sceneNum}".format(sceneNum=scene_id)) return (True, sequence_number, "OK") def set_scene_name(self, scene_id, name): """rename a scene by scene ID""" if not scene_id in self.state.scenes: # does that scene_id exist? err_msg = "Requested to rename scene {sceneNum}, which does not exist".format(sceneNum=scene_id) logging.info(err_msg) return(False, 0, err_msg) self.state.scenes[scene_id] = self.state.scenes[scene_id]._replace(name=name) # TODO: is there a better solution? sequence_number = self.zmq_publisher.publish_scene_name(scene_id, name) logging.debug("Renamed scene {sceneNum}".format(sceneNum=scene_id)) return (True, sequence_number, "OK") def set_scene_config(self, scene_id, config): """reconfigure a scene by scene ID""" if not scene_id in self.state.scenes: # does that scene_id exist? err_msg = "Requested to reconfigure scene {sceneNum}, which does not exist".format(sceneNum=scene_id) logging.info(err_msg) return(False, 0, err_msg) if scene_id == self.state.activeSceneId: pass # TODO: maybe calculate next frame, esp. if static scene self.state.scenes[scene_id] = self.state.scenes[scene_id]._replace(config=config) sequence_number = self.zmq_publisher.publish_scene_config(scene_id, config) logging.debug("Reconfigured scene {sceneNum}".format(sceneNum=scene_id)) return (True, sequence_number, "OK") def set_scene_color(self, scene_id, color): """reconfigure a scene by scene ID""" if not scene_id in self.state.scenes: # does that scene_id exist? err_msg = "Requested to recolor scene {sceneNum}, which does not exist".format(sceneNum=scene_id) logging.info(err_msg) return(False, 0, err_msg) self.state.scenes[scene_id] = self.state.scenes[scene_id]._replace(color=color) sequence_number = self.zmq_publisher.publish_scene_color(scene_id, color) logging.debug("Recolored scene {sceneNum}".format(sceneNum=scene_id)) if scene_id == self.state.activeSceneId: self.state.activeAnimation.set_color(color) self._do_next_frame() # TODO: make it more sensible, e.g. call only if static scene return (True, sequence_number, "OK") def set_scene_velocity(self, scene_id, velocity): """reconfigure a scene by scene ID""" if not scene_id in self.state.scenes: # does that scene_id exist? err_msg = "Requested to set velocity on scene {sceneNum}, which does not exist".format(sceneNum=scene_id) logging.info(err_msg) return(False, 0, err_msg) self.state.scenes[scene_id] = self.state.scenes[scene_id]._replace(velocity=velocity) sequence_number = self.zmq_publisher.publish_scene_velocity(scene_id, velocity) logging.debug("set velocity on scene {sceneNum}".format(sceneNum=scene_id)) if scene_id == self.state.activeSceneId: self.state.activeAnimation.set_velocity(velocity) self._do_next_frame() # TODO: make it more sensible, e.g. call only if static scene return (True, sequence_number, "OK") def set_scene_active(self, scene_id): """sets the active scene by scene ID""" if self.state.activeSceneId != scene_id: # do nothing if scene has not changed self._deactivate_scene() sequence_number = self.zmq_publisher.publish_active_scene(scene_id) self.state.activeSceneId = scene_id if self.state.mainswitch is True: # activate scene only if we are switched on self._activate_scene() logging.debug("Set scene {sceneNum} as active scene".format(sceneNum=scene_id)) return (True, sequence_number, "OK") else: logging.debug("Scene {sceneNum} already is active scene".format(sceneNum=scene_id)) return (False, 0, "This already is the activated scene.") def set_mainswitch_state(self, state): """Turns output on or off. Also turns hardware on ir off""" if self.state.mainswitch == state: err_msg = "MainSwitch unchanged, already is {sState}".format(sState="On" if state else "Off") # fo obar lorem ipsum logging.debug(err_msg) # fo obar lorem ipsum return (False, 0, err_msg) # because nothing changed self.state.mainswitch = state sequence_number = self.zmq_publisher.publish_mainswitch_state(state) logging.debug("MainSwitch toggled, new state is {sState}".format(sState="On" if state else "Off")) # fo obar lorem ipsum if state is True: self.hw_communication.switch_on() self._activate_scene() # reinit scene else: self._deactivate_scene() self.hw_communication.switch_off() return (True, sequence_number, "OK") def toggle_mainswitch_state(self): """Toggles the mainswitch state""" return self.set_mainswitch_state(not self.state.mainswitch) def _activate_scene(self): if self.state.activeSceneId in self.state.scenes: # is scene_id valid? if not, assume there is no scene configured animation_class = self.state.animationClasses[self.state.scenes[self.state.activeSceneId].animation_id] self.state.activeAnimation = animation_class() target_fps = min(self.config.max_fps, self.state.activeAnimation.get_max_fps(), self.hw_communication.get_max_fps()) if target_fps < 0: target_fps = 0 self.state.target_fps = target_fps logging.debug("Running with {fps} FPS".format(fps=target_fps)) self.state.activeAnimation.prepare(self.led_count, target_fps) self.state.activeAnimation.set_color(self.state.scenes[self.state.activeSceneId].color) self.state.activeAnimation.set_velocity(self.state.scenes[self.state.activeSceneId].velocity) self.state.activeAnimation.set_config(self.state.scenes[self.state.activeSceneId].config) if target_fps > 0: # 0 FPS means one-shot -> no periodic callback required self.caller.callback_time = 1000/target_fps self.caller.start() self.loop.add_callback_from_signal(self._do_next_frame) # execute once to not have to wait for periodic callback (self.caller), esp. if 0 or low FPS else: self.state.activeAnimation = None # don't do anything. stuck with last frame. def _deactivate_scene(self): if not self.state.activeAnimation is None: self.caller.stop() # stop rendering new frames self.state.activeAnimation.finish() self.state.activeAnimation = None def _on_next_frame(self): logging.debug("generating next frame") self._do_next_frame() def _do_next_frame(self): if self.state.activeAnimation: buf = np.zeros((self.led_count, 3)) self.state.activeAnimation.render_next_frame(buf) self.state.last_buf = np.copy(buf) self._send_frame(buf) else: logging.debug("app: No Active Animation") def _repeat_last_frame(self): # only do something, if there is an active animation, else output is considered to be turned off if hasattr(self.state, 'last_buf') and self.state.last_buf is not None and self.state.activeAnimation is not None: if self.state.target_fps < self.config.max_fps / 4: # to not overload hwbackend, only resend, if active animation is very slow self._send_frame(np.copy(self.state.last_buf)) def _send_frame(self, buf): np.clip(buf, 0.0, 1.0, out=buf) self.state.lastFrameSent = datetime.datetime.now() buf *= self.state.brightness self.hw_communication.send(buf) def execute(self): """Execute Main Loop""" try: logging.debug("Entering IOLoop") self.loop.start() logging.debug("Leaving IOLoop") except KeyboardInterrupt: logging.debug("Leaving IOLoop by KeyboardInterrupt") finally: self.hw_communication.disconnect()
class DeviceServiceManager(MDPWorker): _connected_devices = {} def __init__(self, context, endpoint, service): """Overridden initializer for MDPWorker. Adds the device_timer to manage connected devices """ session = Session() session.query(Device).update({Device.connected: False}) session.commit() self.device_timer = None super(DeviceServiceManager, self).__init__(context, endpoint, service) return def _create_stream(self): """Overidden _create_stream for MDPWorker Adds the device_timer to manage connected devices """ self.device_timer = PeriodicCallback(self.device_watcher, CLIENT_HB_INTERVAL) self.device_timer.start() super(DeviceServiceManager, self)._create_stream() return def shutdown(self): """Overidden shutdown for MDPWorker Adds the device_timer to manage connected devices """ self.device_timer.stop() self.device_timer = None super(DeviceServiceManager, self).shutdown() return def device_watcher(self): for device in self._connected_devices.values(): if not device.is_alive(): session = Session() session.query(Device).filter_by(id=device.id) \ .update({Device.connected: False}) session.commit() device.shutdown() del self._connected_devices[device.id] return def on_heartbeat(self, did): if did in self._connected_devices: if self._connected_devices[did].is_alive(): self._connected_devices[did].on_message_received() return self._connected_devices[did].get_state() else: session = Session() device = session.query(Device).filter_by(id=did).first() if not device: session.close() # signals the device to send a create message return 'unknown' else: self._connected_devices[did] = DeviceRep(did, state='connected') device.connected = True session.commit() return 'connected' return def on_create(self, did, msg): imported_device = pickle.loads(msg[0]) device = Device(imported_device['id'], version=imported_device['version']) device.connected = True device.user_configured = False device.database_service = imported_device['database_service'] device.device_service = imported_device['device_service'] device.grainbin_service = imported_device['grainbin_service'] device.grainbin_count = imported_device['grainbin_count'] session = Session() session.add(device) if device.grainbin_service: grainbins = [] for x in range(device.grainbin_count): id = device.id + '.' + str(x).zfill(2) grainbin = Grainbin(id, device.id, x) grainbins.append(grainbin) session.add_all(grainbins) session.commit() session.close() return "added" def on_remove(self, did, msg): if did in self._connected_devices: self._connected_devices[did].shutdown() del self._connected_devices[did] return "removed" def on_update(self, did, msg): session = Session() device = session.query(Device).filter_by(id=did).first() if device: if device.user_configured: update = pickle.loads(msg[0]) device.interior_temp = update['interior_temp'] device.exterior_temp = update['exterior_temp'] session.commit() session.close() values = {'interior_temp': update['interior_temp'], 'exterior_temp': update['exterior_temp']} rrd = RRD(did, 'device') print(values) print(update['timestamp']) rrd.update(values, timestamp=update['timestamp']) return "updated" else: session.close() return 'device_unconfigured' else: session.close() return 'no_device' return def on_control(self, did, msg): control_command = msg.pop(0) if control_command == 'remove': device = msg.pop(0) reply = self.on_remove(device, "") return reply if control_command == 'refresh': print("Command refresh received") device = msg.pop(0) publisher.send(control_command) return 'sent' return def on_request(self, msg): device_id = msg.pop(0) command = msg.pop(0) if command == 'heartbeat': return_msg = self.on_heartbeat(device_id) reply = ['heartbeat', return_msg] self.reply(reply) return elif command == 'create': value = self.on_create(device_id, msg) reply = ['create', value] self.reply(reply) return elif command == 'remove': value = self.on_remove(device_id, msg) reply = ['remove', value] self.reply(reply) return elif command == 'update': value = self.on_update(device_id, msg) reply = ['update', value] self.reply(reply) elif command == 'control': value = self.on_control(device_id, msg) reply = ['control', value] self.reply(reply) else: print(msg) reply = [command, '505'] self.reply(reply) return
class Echo(CCHandler): """ Echo handler / sender / monitor """ CC_ROLES = ['local', 'remote'] log = skytools.getLogger('h:Echo') ping_tick = 1 zmq_hwm = 1 zmq_linger = 0 def __init__(self, hname, hcf, ccscript): super(Echo, self).__init__(hname, hcf, ccscript) self.echoes = {} # echo stats for monitored peers self.stream = {} # connections to monitored peers for url in self.cf.getlist("ping-remotes", ""): sock = self._make_socket(url) self.stream[url] = CCStream(sock, ccscript.ioloop, qmaxsize=self.zmq_hwm) self.stream[url].on_recv(self.on_recv) self.echoes[url] = EchoState(url) self.log.debug("will ping %s", url) self.timer = PeriodicCallback(self.ping, self.ping_tick * 1000, self.ioloop) self.timer.start() def _make_socket(self, url): """ Create socket for pinging remote CC. """ sock = self.zctx.socket(zmq.XREQ) try: sock.setsockopt(zmq.HWM, self.zmq_hwm) except AttributeError: sock.set_hwm(self.zmq_hwm) sock.setsockopt(zmq.LINGER, self.zmq_linger) sock.connect(url) return sock def on_recv(self, zmsg): """ Got reply from a remote CC, process it. """ try: self.log.trace("%r", zmsg) cmsg = CCMessage(zmsg) req = cmsg.get_dest() if req == "echo.response": self.process_response(cmsg) else: self.log.warn("unknown msg: %s", req) except: self.log.exception("crashed, dropping msg") def handle_msg(self, cmsg): """ Got a message, process it. """ self.log.trace("%r", cmsg) req = cmsg.get_dest() if req == "echo.request": self.process_request(cmsg) else: self.log.warn("unknown msg: %s", req) def process_request(self, cmsg): """ Ping received, respond with pong. """ msg = cmsg.get_payload(self.xtx) if not msg: return rep = EchoResponseMessage(orig_hostname=msg['hostname'], orig_target=msg['target'], orig_time=msg['time']) rcm = self.xtx.create_cmsg(rep) rcm.take_route(cmsg) rcm.send_to(self.cclocal) def process_response(self, cmsg): """ Pong received, evaluate it. """ msg = cmsg.get_payload(self.xtx) if not msg: return url = msg.orig_target if url not in self.echoes: self.log.warn("unknown pong: %s", url) return echo = self.echoes[url] echo.update_pong(msg) rtt = echo.time_pong - msg.orig_time if msg.orig_time == echo.time_ping: self.log.trace("echo time: %f s (%s)", rtt, url) elif rtt <= 5 * self.ping_tick: self.log.debug("late pong: %f s (%s)", rtt, url) else: self.log.info("too late pong: %f s (%s)", rtt, url) def send_request(self, url): """ Send ping to remote CC. """ msg = EchoRequestMessage(target=url) cmsg = self.xtx.create_cmsg(msg) self.stream[url].send_cmsg(cmsg) self.echoes[url].update_ping(msg) self.log.trace("%r", msg) def ping(self): """ Echo requesting and monitoring. """ self.log.trace("") for url in self.stream: echo = self.echoes[url] if echo.time_ping - echo.time_pong > 5 * self.ping_tick: self.log.warn("no pong from %s for %f s", url, echo.time_ping - echo.time_pong) self.send_request(url) def stop(self): super(Echo, self).stop() self.log.info("stopping") self.timer.stop()
class WorkerRep(object): """Helper class to represent a worker in the broker. Instances of this class are used to track the state of the attached worker and carry the timers for incomming and outgoing heartbeats. :param proto: the worker protocol id. :type wid: str :param wid: the worker id. :type wid: str :param service: service this worker serves :type service: str :param stream: the ZMQStream used to send messages :type stream: ZMQStream """ def __init__(self, proto, wid, service, stream): l = logger.Logger('mq_broker') self.log = l.get_logger() self.proto = proto self.id = wid self.service = service self.curr_liveness = HB_LIVENESS self.stream = stream self.last_hb = 0 self._startLockHB = 0 self._timeOut = 0 self.hb_out_timer = PeriodicCallback(self.send_hb, HB_INTERVAL) self.hb_out_timer.start() return def lockhbeat(self, timeOut): """Lock heartbeat process during timeout to preserve rep from client req """ self.log.debug("WorkerRep {0} HBeat Locked for {1} s...".format(self.service, timeOut)) self._startLockHB = time.time() self._timeOut = timeOut def unlockhbeat(self): """Unlock heartbeat process """ self.log.debug("WorkerRep {0} HBeat Unlocked".format(self.service)) self._startLockHB = 0 self._timeOut = 0 def get_lockhbeat(self): """Get ock heartbeat status """ return self._startLockHB != 0 def send_hb(self): """Called on every HB_INTERVAL. Decrements the current liveness by one. Sends heartbeat to worker. """ if self.get_lockhbeat() : if time.time() >= self._startLockHB + self._timeOut : self.log.warning("WorkerRep {0} HBeat lock timeout {1} s".format(self.service, self._timeOut)) self.log.debug(u"{0}, {1}".format(self._startLockHB + self._timeOut, time.time())) self.unlockhbeat() if not self.get_lockhbeat() : self.curr_liveness -= 1 msg = [ self.id, b'', self.proto, b'\x04' ] self.stream.send_multipart(msg) return def on_heartbeat(self): """Called when a heartbeat message from the worker was received. Sets current liveness to HB_LIVENESS. """ self.curr_liveness = HB_LIVENESS return def is_alive(self): """Returns True when the worker is considered alive. """ return self.curr_liveness > 0 def shutdown(self): """Cleanup worker. Stops timer. """ self.hb_out_timer.stop() self.hb_out_timer = None self.stream = None return