def sender(): address = key = None while 1: try: address, key, value = outbox.get() sock = clients.get(address) if not sock: sock = socket.socket() sock.connect(address) clients[address] = sock try: sock.sendall(struct.pack(SIZE_FORMAT, len(key))) sock.sendall(key) sock.sendall(struct.pack(SIZE_FORMAT, len(value))) sock.sendall(value) except Exception: del clients[address] # Reconnect next time. raise except Exception as e: if not is_disconnect(e): crit(also=dict(address=address, key=key))
def send_to_worker(worker, func_name, request, args=()): """ Send command to other worker or execute command locally. @param worker: int @param func_name: str @param request: dict - defined in "on_request" @param args: tuple(str) """ if log.level == logging.DEBUG or config['grep']: verbose('w{} > w{}: {}{} for request={}'.format( state.worker, worker, func_name, args, request)) if worker == state.worker: execute(func_name, request, args) else: wall = gbn('send_to_worker.other') # "command protocol" encodes 40x faster than default "pickle.dumps" and produces 9x smaller result: command = '\t'.join((func_name, request['id'], request.get('client', '-'), str(request.get('worker', -1)), str(int(request.get('confirm', False)))) + args) if len(command) >= config['warn_command_bytes']: crit('WARNING! Too big: "{}" == {} >= {} bytes'.format( command, len(command), config['warn_command_bytes'])) state.commands_to_workers[worker].put(command) state.commands_put += 1 gbn(wall=wall)
def commands_sender(worker): """ Sends queued commands to other worker's socket from single greenlet. @param worker: int """ commands = state.commands_to_workers[worker] while 1: try: command = commands.peek() sock = state.socks_by_workers.get(worker) if sock is None: time.sleep(config['block_seconds']) continue wall = gbn('commands_sender') try: sock.sendall(command + '\n') except Exception: gbn(wall=wall) on_worker_disconnected(worker) continue commands.get() # Safe to delete command from "commands" queue. gbn(wall=wall) time.sleep(0) except Exception: crit(also='w{}: w{}'.format(state.worker, worker))
def disconnect(): """ Disconnect from all workers. """ try: state['auto_reconnect'] = False for worker, sock in state['socks'].iteritems(): for greenlet_name in 'pingers', 'receivers', 'senders': state[greenlet_name][worker].kill() del state[greenlet_name][worker] sock.close() for on_disconnect in state['on_disconnect'].values(): try: on_disconnect() except Exception: crit() init_state() except Exception: crit()
def connect(worker=None, old_sock=None): """ Connect to MQKS worker or prepare for auto-connect to multiple workers of queues and events on demand. @param worker: int|None @param old_sock: gevent._socket2.socket|None - For atomic CAS. """ global WORKERS WORKERS = len(config['workers']) state['auto_reconnect'] = True if worker is None: return while state['socks'].get(worker) is old_sock: try: config['_log'] = logging.getLogger(config['logger_name']) config['_log'].info('connecting to w{}'.format(worker)) host, _, port = config['workers'][worker].split(':') port = int(port) sock = socket.socket() sock.connect((host, port)) if state['socks'].get(worker) is old_sock: # Greenlet-atomic CAS. state['socks'][worker] = sock else: config['_log'].info( 'detected duplicate connection to w{}, closing it'.format( worker)) sock.close() return if worker not in state['pingers']: state['pingers'][worker] = spawn(_pinger, worker) if worker not in state['receivers']: state['receivers'][worker] = spawn(_receiver, worker) if worker not in state['requests']: state['requests'][worker] = Queue() if worker not in state['senders']: state['senders'][worker] = spawn(_sender, worker) if worker in state['consumers']: _reconsume(worker) else: state['consumers'][worker] = {} break except Exception: crit(also=worker) time.sleep(config['reconnect_seconds'])
def on_request(request): """ Handler of request from client. @param request: dict( client: str, worker: int, body: str, ) "action()" gets "request" without "body" but with ( id: str, action: str, data: str - should be parsed inside "action()", confirm: bool, ) Command in another worker gets "request" without ( action: str - unused, data: str - unused and may be very big ) """ wall = gbn('on_request') request['id'] = None try: body = request['body'].rstrip() request['id'], request['action'], request['data'] = body.split(' ', 2) del request[ 'body'] # Less data to pass between workers. On error "request" with "id, action, data" will be logged. if log.level == logging.DEBUG or config['grep']: verbose('w{}: {}#{} > {} {}'.format(state.worker, request['client'], request['id'], request['action'], request['data'])) request['confirm'] = request['data'].startswith('--confirm ') if request['confirm']: request['data'] = request['data'][10:] action = state.actions[request['action']] wall = gbn(request['action'], wall=wall) action(request) gbn(wall=wall) except Exception: request['error_id'] = dtid(config['id_length']) crit(also=request) try: respond(request) except Exception: crit(also=request) gbn(wall=wall)
def flow(): try: queue = dtid() consumer_id = mqks.consume(queue=queue, events=[queue], on_msg=lambda msg: None, delete_queue_when_unused=1, confirm=True) for _ in xrange(10): mqks.publish(queue, dtid()) time.sleep(1) mqks.delete_consumer(consumer_id) except Exception: crit()
def _pinger(worker): """ Pings MQKS worker for keep-alive. @param worker: int """ while 1: try: time.sleep(config['ping_seconds'] or 10) if config['ping_seconds']: ping(worker) except Exception: crit()
def on_worker_disconnected(worker): """ Handler of worker disconnect. @param worker: int|None """ if worker is None: log.info('w{}: bye w{}'.format(state.worker, worker)) else: state.socks_by_workers.pop( worker, None ) # "workers_connector" or "server_for_workers" will reconnect, "commands_sender" will wait for new socket. crit(also='w{}: lost w{}, reconnecting, error ='.format( state.worker, worker))
def on_client(sock, address): try: while 1: size, = struct.unpack(SIZE_FORMAT, recvall(sock, SIZE_SIZE)) key = recvall(sock, size) size, = struct.unpack(SIZE_FORMAT, recvall(sock, SIZE_SIZE)) value = recvall(sock, size) result = results.get(key) if result: result.set(value) except Exception as e: if not is_disconnect(e): crit(also=dict(address=address))
def responder(client): """ Sends queued responses to socket of client. See also "mqks.server.lib.workers.respond()" that enqueues response to "state.responses_by_clients[client]". @param client: str """ response = None try: responses = state.responses_by_clients.get(client) sock = state.socks_by_clients.get(client) if not responses or not sock: return while client in state.responses_by_clients: try: response = responses.get(timeout=config['block_seconds']) except Empty: continue wall = gbn('responder') try: request, data = response error_id = request.get('error_id') response = '{} {}'.format('error' if error_id else 'ok', error_id or data) if log.level == logging.DEBUG or config['grep']: verbose('w{}: {}#{} < {}'.format(state.worker, client, request['id'], response)) response = '{} {}\n'.format(request['id'], response) except Exception: gbn(wall=wall) crit(also=dict(response=response)) continue try: sock.sendall(response) # Disconnect on socket error. finally: gbn(wall=wall) time.sleep(0) except Exception as e: if not is_disconnect(e): crit(also=dict(client=client, response=response))
def top_events_log_and_reset(): """ Reports top events from time to time, if enabled. """ while 1: try: time.sleep(config['top_events_seconds']) if config['top_events']: wall = gbn('top_events') rows = sorted(state.top_events.iteritems(), key=lambda row: -row[1])[:config['top_events_limit']] # Sort by "published" desc. state.top_events.clear() # Before context switch on logging IO. log.info('w{}: top events: {}'.format(state.worker, ' '.join('{}={}'.format(*row) for row in rows))) gbn(wall=wall) except Exception: crit()
def workers_connector(): """ Connects and reconnects to other workers. """ while 1: try: for worker in xrange( state.worker + 1, WORKERS ): # Avoids racing when two workers connect each other at the same time. if worker in state.socks_by_workers: continue host, port_for_workers, _ = config['workers'][worker].split( ':') family = AF_UNIX if host == config['host'] else AF_INET addr = os.path.join(config['unix_sock_dir'], 'w{}'.format( worker)) if family == AF_UNIX else (host, port_for_workers) sock = socket.socket(family=family) try: sock.connect(addr) except Exception as e: log.debug('w{}: failed to connect to w{} at {}, error: {}'. format(state.worker, worker, addr, e)) sock.close() continue state.socks_by_workers[worker] = sock log.debug('w{}: connected to w{} at {}'.format( state.worker, worker, addr)) sock.sendall(config['workers'][state.worker] + '\n') spawn(on_worker_connected, sock, worker=worker) if len(state.socks_by_workers ) == WORKERS - 1 and not state.server_for_clients: state.server_for_clients = StreamServer( get_listener(config['port_for_clients']), on_client_connected) state.server_for_clients.start() except Exception: crit(also=dict(worker=state.worker)) time.sleep(config['block_seconds'])
def on_client_connected(sock, addr): """ Client connection handler. @param sock: gevent._socket2.socket @param addr: tuple(host: str, port: int) """ client = request = None try: # To avoid conflicts when client reuses local port of other disconnected client - random part is added - is also good for log reading. client = '{1}:{2}.{0}'.format(uqid(config['client_postfix_length']), *addr) log.info('w{}: new client {}'.format(state.worker, client)) state.socks_by_clients[client] = sock state.responses_by_clients[client] = Queue() spawn(responder, client) f = sock.makefile('r') while 1: request = dict(client=client, worker=state.worker) request['body'] = f.readline() if request['body'] == '': break assert '\t' not in request['body'] # See "spec.txt". on_request(request) time.sleep(0) except Exception as e: if not is_disconnect(e): crit(also=dict(client=client, request=request)) finally: log.info('w{}: bye client {}'.format(state.worker, client)) try: delete_consumers(client) state.responses_by_clients.pop(client) state.socks_by_clients.pop(client) except Exception: crit(also=dict(client=client))
def on_error(command, request=None): """ Crit and try responding the error to the client. @param command: None|str|tuple - defined in "execute" @param request: None|dict - defined in "on_request" """ error = command try: error_id = dtid(config['id_length']) error = dict(command=command, error_id=error_id, worker=state.worker) crit(also=error) if request: request['error_id'] = error_id remote_respond(request) except Exception: crit(also=error)
def _wait_used_or_delete_queue(client, queue, seconds): """ Wait some seconds for queue to be used by consumers, else delete queue. @param queue: client @param queue: str @param seconds: float """ try: queue_used = state.queues_used.get(queue) if queue_used is None or not queue_used.wait(seconds): request = dict(id='delete_queue_when_unused', client=client, worker=state.worker, confirm=False) _delete_queue(request, queue) except Exception: crit()
def _on_disconnect(worker, e, old_sock): """ Handles unexpected disconnect. @param worker: int @param e: Exception @param old_sock: gevent._socket2.socket|None """ if config['_log'].level == logging.DEBUG: config['_log'].debug('disconnected from w{}: {}'.format(worker, e)) for consumer_id in state['consumers'][worker].keys(): on_disconnect = state['on_disconnect'].get(consumer_id) if on_disconnect: try: on_disconnect() except Exception: crit(also=dict(worker=worker, consumer_id=consumer_id)) time.sleep(config['reconnect_seconds']) if state['auto_reconnect']: connect(worker, old_sock=old_sock)
def _reconsume(worker): """ Request consume again with updated consumer_ids. Is called on reconnect. @param worker: int """ consumers = state['consumers'][worker] for old_consumer_id, consumer in consumers.items(): new_consumer_id = _request_id() consumers.pop(old_consumer_id, None) consumers[new_consumer_id] = consumer state['workers'].pop(old_consumer_id, None) state['workers'][new_consumer_id] = worker on_reconnect = state['on_reconnect'].pop(old_consumer_id, None) if on_reconnect: state['on_reconnect'][new_consumer_id] = on_reconnect try: on_reconnect(old_consumer_id, new_consumer_id) except Exception: crit(also=dict(old_consumer_id=old_consumer_id, new_consumer_id=new_consumer_id)) on_disconnect = state['on_disconnect'].pop(old_consumer_id, None) if on_disconnect: state['on_disconnect'][new_consumer_id] = on_disconnect on_msg = state['on_msg'].pop(old_consumer_id, None) if on_msg: state['on_msg'][new_consumer_id] = on_msg # No need to update old "consumer_id" partial-bound into "msg.ack()" and "msg.reject()": # when client disconnects from server, server deletes old consumer and rejects all msgs. _send(worker, new_consumer_id, 'consume', consumer)
def _sender(worker): """ Send requests from queue to sock to avoid "This socket is already used by another greenlet" error. @param worker: int """ try: requests = state['requests'][worker] # "requests" for worker CAN NOT be changed without kill of "_sender", so we create local name. # "sock" of worker CAN be changed without kill of "_sender" - on reconnect, preserving "requests". except Exception: crit() sock = None while 1: try: try: request = requests.peek(timeout=1) except Empty: continue sock = state['socks'][worker] try: sock.sendall(request) except Exception as e: _on_disconnect(worker, e, sock) continue requests.get() # Delete request from queue. except Exception as e: crit() time.sleep(config['reconnect_seconds']) # Less spam.
def _receiver(worker): """ Receive responses. @param worker: int """ sock = state['socks'].get(worker) while 1: try: sock = state['socks'][worker] f = sock.makefile('r') while 1: response = f.readline() if response == '': # E.g. socket is broken. break try: response = response.rstrip( '\r\n') # Not trailing space in "ok " confirm. request_id, response_type, data = response.split(' ', 2) if config['_log'].level == logging.DEBUG: config['_log'].debug('#{} < w{}: {} {}'.format( request_id, worker, response_type, data)) ### error if response_type == 'error': error = Exception(response) eval_result = state['eval_results'].pop( request_id, None) if eval_result: eval_result.set_exception(error) continue raise error ### confirm if data == '': confirm_event = state['confirms'].get(request_id) if confirm_event is not None: confirm_event.set() continue ### consume on_msg = state['on_msg'].get(request_id) if on_msg: consumer_id = request_id ### update consumer if data.startswith('--update '): _, consumer = data.split(' ', 1) if consumer_id in state['workers']: state['consumers'][worker][ consumer_id] = consumer continue ### msg msg_id, props, data = data.split(' ', 2) msg = dict( id=msg_id, data=data, ack=partial(ack, consumer_id, msg_id), reject=partial(reject, consumer_id, msg_id), ) for prop in props.split(','): name, value = prop.split('=', 1) msg[name] = value spawn(_safe_on_msg, on_msg, msg) continue ### eval_result eval_result = state['eval_results'].pop(request_id, None) if eval_result: eval_result.set(data) ### except except Exception: crit(also=dict(worker=worker, response=response)) except Exception as e: _on_disconnect(worker, e, sock) else: _on_disconnect(worker, None, sock)
def _safe_on_msg(on_msg, msg): try: on_msg(msg) except Exception: crit()
def receiver(): try: state['server'].serve_forever() except Exception: crit()