def sending_loop_clients(websocket): # create sending-queue loop = asyncio.get_event_loop() sending_queue_sensors = Queue() logger.info('websockets .... smartHome Queue startet') def changed(tmp): loop.call_soon_threadsafe(sending_queue_sensors.put_nowait, tmp) try: consumers_clients.append(changed) logger.info( 'websockets .... ein neuer smartHome-Client wurde in die Queue aufgenommen: %s ' % changed) while True: tmp_data = yield from sending_queue_sensors.get() yield from websocket.send(tmp_data) logger.debug( 'websockets .... Sende json Daten -> smartHome-Client : %s' % tmp_data) finally: consumers_clients.remove(changed) logger.info( 'websockets .... ein smartHome-Client wurde aus der Queue entfernt: %s ' % changed)
class IRCClientProtocol(asyncio.Protocol): """Low-level protocol that speaks the client end of IRC. This isn't responsible for very much besides the barest minimum definition of an IRC client: connecting and responding to PING. You probably want `read_message`, or the higher-level client class. """ def __init__(self, loop, nick, password, charset="utf8"): self.nick = nick self.password = password self.charset = charset self.buf = b"" self.message_queue = Queue(loop=loop) self.registered = False def connection_made(self, transport): self.transport = transport if self.password: self.send_message("PASS", self.password) self.send_message("NICK", self.nick) self.send_message("USER", "dywypi", "-", "-", "dywypi Python IRC bot") def data_received(self, data): data = self.buf + data while True: raw_message, delim, data = data.partition(b"\r\n") if not delim: # Incomplete message; stop here and wait for more self.buf = raw_message return # TODO valerr message = IRCMessage.parse(raw_message.decode(self.charset)) logger.debug("recv: %r", message) self.handle_message(message) def handle_message(self, message): if message.command == "PING": self.send_message("PONG", message.args[-1]) elif message.command == "RPL_WELCOME": # 001, first thing sent after registration if not self.registered: self.registered = True self.message_queue.put_nowait(message) def send_message(self, command, *args): message = IRCMessage(command, *args) logger.debug("sent: %r", message) self.transport.write(message.render().encode(self.charset) + b"\r\n") @asyncio.coroutine def read_message(self): return (yield from self.message_queue.get())
async def _handler(self, websocket, path): print('[WebSocketThread] Incoming connection') queue = Queue() async def send_message_async(message): await queue.put(message) def send_message(message): asyncio.run_coroutine_threadsafe(send_message_async(message), self._loop) def close(): send_message(None) on_open, on_message, on_close = self._accept_connection(send_message, close) on_open() listener_task = asyncio.ensure_future(websocket.recv()) producer_task = asyncio.ensure_future(queue.get()) try: while True: done, pending = await asyncio.wait( [listener_task, producer_task], return_when=asyncio.FIRST_COMPLETED) if listener_task in done: message = listener_task.result() on_message(message) listener_task = asyncio.ensure_future(websocket.recv()) if producer_task in done: message = producer_task.result() if message is None: break producer_task = asyncio.ensure_future(queue.get()) await websocket.send(message) finally: listener_task.cancel() producer_task.cancel() on_close() print('[WebSocketThread] Connection closed')
def sending_loop_gui(websocket): # create sending-queue loop = asyncio.get_event_loop() sending_queue_gui = Queue() logger.info('websockets .... GUI Queue startet') def changed(tmp): loop.call_soon_threadsafe(sending_queue_gui.put_nowait, tmp) try: consumers_gui.append(changed) logger.info('websockets .... ein GUI-Client wurde in die Queue aufgenommen') while True: tmp_data = yield from sending_queue_gui.get() yield from websocket.send(tmp_data) logger.debug('websockets .... Sende json Daten -> GUI : %s' % tmp_data) finally: consumers_gui.remove(changed) logger.info('websockets .... ein GUI-Client wurde aus der Queue entfernt')
def sending_loop(websocket): # create sending-queue loop = asyncio.get_event_loop() sending_queue = Queue() logger.info("websockets .... Queue startet") def changed(tmp): loop.call_soon_threadsafe(sending_queue.put_nowait, tmp) try: consumers.append(changed) logger.info("websockets .... consumers.append") while True: tmp_data = yield from sending_queue.get() yield from websocket.send(tmp_data) logger.debug("websockets .... yield from websocket.send : %s" % tmp_data) finally: consumers.remove(changed) logger.info("websockets .... consumers.remove")
class ShellClient: def __init__(self, loop, network): self.loop = loop # TODO it would be nice to parametrize these (or even accept arbitrary # transports), but the event loop doesn't support async reading from # ttys for some reason... self.stdin = sys.stdin self.stdout = sys.stdout self.event_queue = Queue(loop=loop) @asyncio.coroutine def connect(self): self.protocol = UrwidTerminalProtocol(DywypiShell, self.loop) self.transport = TrivialFileTransport(self.loop, self.stdin, self.stdout, self.protocol) @asyncio.coroutine def disconnect(self): self.protocol.bridge.stop() @asyncio.coroutine def read_event(self): # For now, this will never ever do anything. # TODO this sure looks a lot like IRCClient return (yield from self.event_queue.get()) def format_transition(self, current_style, new_style): if new_style == Style.default(): # Just use the reset sequence return "\x1b[0m" ret = "" if new_style.fg != current_style.fg: ret += FOREGROUND_CODES[new_style.fg] if new_style.bold != current_style.bold: ret += BOLD_CODES[new_style.bold] return ret
def sending_loop(websocket): # create sending-queue loop = asyncio.get_event_loop() sending_queue = Queue() logger.info('websockets .... Queue startet') def changed(tmp): loop.call_soon_threadsafe(sending_queue.put_nowait, tmp) try: consumers.append(changed) logger.info('websockets .... consumers.append') while True: tmp_data = yield from sending_queue.get() yield from websocket.send(tmp_data) logger.debug('websockets .... yield from websocket.send : %s' % tmp_data) finally: consumers.remove(changed) logger.info('websockets .... consumers.remove')
def sending_loop_gui(websocket): # create sending-queue loop = asyncio.get_event_loop() sending_queue_gui = Queue() logger.info('websockets .... GUI Queue startet') def changed(tmp): loop.call_soon_threadsafe(sending_queue_gui.put_nowait, tmp) try: consumers_gui.append(changed) logger.info( 'websockets .... ein GUI-Client wurde in die Queue aufgenommen') while True: tmp_data = yield from sending_queue_gui.get() yield from websocket.send(tmp_data) logger.debug('websockets .... Sende json Daten -> GUI : %s' % tmp_data) finally: consumers_gui.remove(changed) logger.info( 'websockets .... ein GUI-Client wurde aus der Queue entfernt')
class IRCClient: """Higher-level IRC client. Takes care of most of the hard parts of IRC: incoming server messages are bundled into more intelligible events (see ``dywypi.event``), and commands that expect replies are implemented as coroutines. """ def __init__(self, loop, network): self.loop = loop self.network = network self.joined_channels = {} # name => Channel # IRC server features, as reported by ISUPPORT, with defaults taken # from the RFC. self.len_nick = 9 self.len_channel = 200 self.len_message = 510 # These lengths don't have limits mentioned in the RFC, so going with # the smallest known values in the wild self.len_kick = 80 self.len_topic = 80 self.len_away = 160 self.max_watches = 0 self.max_targets = 1 self.channel_types = set('#&') self.channel_modes = {} # TODO, haha. self.channel_prefixes = {} # TODO here too. IRCMode is awkward. self.network_title = self.network.name self.features = {} # Various intermediate state used for waiting for replies and # aggregating multi-part replies # TODO hmmm so what happens if state just gets left here forever? do # we care? self._pending_names = {} self._names_futures = {} self._pending_topics = {} self._join_futures = {} self.event_queue = Queue(loop=loop) def get_channel(self, channel_name): """Returns a `Channel` object containing everything the client definitively knows about the given channel. Note that if you, say, ask for the topic of a channel you aren't in and then immediately call `get_channel`, the returned object won't have its topic populated. State is only tracked persistently for channels the bot is in; otherwise there's no way to know whether or not it's stale. """ if channel_name in self.joined_channels: return self.joined_channels[channel_name] else: return IRCChannel(self, channel_name) @asyncio.coroutine def connect(self): """Coroutine for connecting to a single server. Note that this will nonblock until the client is "registered", defined as the first PING/PONG exchange. """ # TODO this is a poor excuse for round-robin :) server = self.current_server = self.network.servers[0] # TODO i'm pretty sure the server tells us what our nick is, and we # should believe that instead self.nick = self.network.preferred_nick # TODO: handle disconnection, somehow. probably affects a lot of # things. # TODO kind of wish this weren't here, since the creation of the # connection isn't inherently part of a client. really it should be on # the... network, perhaps? and there's no reason i shouldn't be able # to "connect" to a unix socket or pipe or anywhere else that has data. _, self.proto = yield from self.loop.create_connection( lambda: IRCClientProtocol( self.loop, self.network.preferred_nick, password=server.password), server.host, server.port, ssl=server.tls) while True: yield from self._read_message() # TODO this is dumb garbage; more likely this client itself should # just wait for 001/RPL_WELCOME. if self.proto.registered: break # Start the event loop as soon as we've synched, or we can't respond to # anything asyncio.async(self._advance(), loop=self.loop) # Initial joins yield from asyncio.gather(*[ self.join(channel_name) for channel_name in self.network.autojoins ], loop=self.loop) @asyncio.coroutine def disconnect(self): self.proto.send_message('QUIT', 'Seeya!') self.proto.transport.close() @asyncio.coroutine def _advance(self): """Internal coroutine that just keeps the protocol message queue going. Called once after a connect and should never be called again after that. """ # TODO this is currently just to keep the message queue going, but # eventually it should turn them into events and stuff them in an event # queue yield from self._read_message() asyncio.async(self._advance(), loop=self.loop) @asyncio.coroutine def _read_message(self): """Internal dispatcher for messages received from the protocol.""" message = yield from self.proto.read_message() # TODO there is a general ongoing problem here with matching up # responses. ESPECIALLY when error codes are possible. something here # is gonna have to get a bit fancier. maybe it should live at the # protocol level, actually...? # Boy do I ever hate this pattern but it's slightly more maintainable # than a 500-line if tree. handler = getattr(self, '_handle_' + message.command, None) if handler: handler(message) def _handle_RPL_ISUPPORT(self, message): me, *features, human_text = message.args for feature_string in features: feature, _, value = feature_string.partition('=') if value is None: value = True self.features[feature] = value if feature == 'NICKLEN': self.len_nick = int(value) elif feature == 'CHANNELLEN': self.len_channel = int(value) elif feature == 'KICKLEN': self.len_kick = int(value) elif feature == 'TOPICLEN': self.len_topic = int(value) elif feature == 'AWAYLEN': self.len_away = int(value) elif feature == 'WATCH': self.max_watches = int(value) elif feature == 'CHANTYPES': self.channel_types = set(value) elif feature == 'PREFIX': # List of channel user modes, in relative priority order, in # the format (ov)@+ assert value[0] == '(' letters, symbols = value[1:].split(')') assert len(letters) == len(symbols) self.channel_prefixes.clear() for letter, symbol in zip(letters, symbols): mode = IRCMode(letter, prefix=symbol) self.channel_modes[letter] = mode self.channel_prefixes[symbol] = mode elif feature == 'MAXTARGETS': self.max_targets = int(value) elif feature == 'CHANMODES': # Four groups delimited by lists: list-style (+b), arg required # (+k), arg required only to set (+l), argless lists, args, argsets, argless = value.split(',') for letter in lists: self.channel_modes[letter] = IRCMode( letter, multi=True) for letter in args: self.channel_modes[letter] = IRCMode( letter, arg_on_set=True, arg_on_remove=True) for letter in argsets: self.channel_modes[letter] = IRCMode( letter, arg_on_set=True) for letter in argless: self.channel_modes[letter] = IRCMode(letter) elif feature == 'NETWORK': self.network_title = value def _handle_JOIN(self, message): channel_name, = message.args joiner = Peer.from_prefix(message.prefix) # TODO should there be a self.me? how... if joiner.name == self.nick: # We just joined a channel #assert channel_name not in self.joined_channels # TODO key? do we care? # TODO what about channel configuration and anon non-joined # channels? how do these all relate... channel = IRCChannel(self, channel_name) self.joined_channels[channel.name] = channel else: # Someone else just joined the channel self.joined_channels[channel_name].add_user(joiner) def _handle_RPL_TOPIC(self, message): # Topic. Sent when joining or when requesting the topic. # TODO this doesn't handle the "requesting" part # TODO what if me != me? me, channel_name, topic_text = message.args self._pending_topics[channel_name] = IRCTopic(topic_text) def _handle_RPL_TOPICWHOTIME(self, message): # Topic author (NONSTANDARD). Sent after RPL_TOPIC. # Unfortunately, there's no way to know whether to expect this. # TODO this doesn't handle the "requesting" part # TODO what if me != me? me, channel_name, author, timestamp = message.args topic = self._pending_topics.setdefault(channel_name, IRCTopic('')) topic.author = Peer.from_prefix(author) topic.timestamp = datetime.utcfromtimestamp(int(timestamp)) def _handle_RPL_NAMREPLY(self, message): # Names response. Sent when joining or when requesting a names # list. Must be ended with a RPL_ENDOFNAMES. me, useless_equals_sign, channel_name, *raw_names = message.args # List of names is actually optional (?!) if raw_names: raw_names = raw_names[0] else: raw_names = '' names = raw_names.strip(' ').split(' ') namelist = self._pending_names.setdefault(channel_name, []) # TODO modes? should those be stripped off here? # TODO for that matter should these become peers here? namelist.extend(names) def _handle_RPL_ENDOFNAMES(self, message): # End of names list. Sent at the very end of a join or the very # end of a NAMES request. me, channel_name, info = message.args namelist = self._pending_names.pop(channel_name, []) if channel_name in self._names_futures: # TODO we should probably not ever have a names future AND a # pending join at the same time. or, does it matter? self._names_futures[channel_name].set_result(namelist) del self._names_futures[channel_name] if channel_name in self.joined_channels: # Join synchronized! channel = self.joined_channels[channel_name] channel.sync = True channel.topic = self._pending_topics.pop(channel_name, None) for name in namelist: modes = set() # TODO use features! while name and name[0] in '+%@&~': modes.add(name[0]) name = name[1:] # TODO haha no this is so bad. # TODO the bot should, obviously, keep a record of all # known users as well. alas, mutable everything. peer = Peer(name, None, None) channel.add_user(peer, modes) if channel_name in self._join_futures: # Update the Future self._join_futures[channel_name].set_result(channel) del self._join_futures[channel_name] def _handle_PRIVMSG(self, message): event = Message(self, message) self.event_queue.put_nowait(event) @asyncio.coroutine def read_event(self): """Produce a single IRC event. This client does not do any kind of multiplexing or event handler notification; that's left to a higher level. """ return (yield from self.event_queue.get()) # Implementations of particular commands # TODO should this be part of the general client interface, or should there # be a separate thing that smooths out the details? @asyncio.coroutine def say(self, target, message): """Coroutine that sends a message to a target, which may be either a `Channel` or a `Peer`. """ yield from self.send_message('PRIVMSG', target, message) def join(self, channel_name, key=None): """Coroutine that joins a channel, and nonblocks until the join is "synchronized" (defined as receiving the nick list). """ if channel_name in self._join_futures: return self._join_futures[channel_name] # TODO multiple? error on commas? if key is None: self.proto.send_message('JOIN', channel_name) else: self.proto.send_message('JOIN', channel_name, key) # Clear out any lingering names list self._pending_names[channel_name] = [] # Return a Future, to be populated by the message loop fut = self._join_futures[channel_name] = asyncio.Future() return fut def names(self, channel_name): """Coroutine that returns a list of names in a channel.""" self.proto.send_message('NAMES', channel_name) # No need to do the same thing twice if channel_name in self._names_futures: return self._names_futures[channel_name] # Clear out any lingering names list self._pending_names[channel_name] = [] # Return a Future, to be populated by the message loop fut = self._names_futures[channel_name] = asyncio.Future() return fut def set_topic(self, channel, topic): """Sets the channel topic.""" self.proto.send_message('TOPIC', channel, topic) @asyncio.coroutine def send_message(self, command, *args): self.proto.send_message(command, *args) def format_transition(self, current_style, new_style): if new_style == Style.default(): # Reset code, ^O return '\x0f' if new_style.fg != current_style.fg and new_style.fg is Color.default: # IRC has no "reset to default" code. mIRC claims color 99 is for # this, but it lies, at least in irssi. So we must reset and # reapply everything. ret = '\x0f' if new_style.bold is Bold.on: ret += '\x02' return ret ret = '' if new_style.fg != current_style.fg: ret += FOREGROUND_CODES[new_style.fg] if new_style.bold != current_style.bold: # There's no on/off for bold, just a toggle ret += '\x02' return ret
class DCCClient: def __init__(self, loop, network, send=False): self.loop = loop self.network = network self.read_queue = Queue(loop=loop) self.send = send #ugh what if i want to RECEIVE though. #not sure what the use case would be but...? @asyncio.coroutine def connect(self, port=None): if not self.send: server = self.current_server = self.network.servers[0] self._reader, self._writer = yield from server.connect(self.loop) self._read_loop_task = asyncio.Task(self._start_read_loop()) asyncio.async(self._read_loop_task, loop=self.loop) else: self._waiting = asyncio.Lock() yield from self._waiting.acquire() if port: self.network = yield from asyncio.start_server(self._handle_client, host=socket.gethostbyname(socket.gethostname()), port=port, loop=self.loop) else: logger.error("No port provided for send") @asyncio.coroutine def _handle_client(self, client_reader, client_writer): self._reader = client_reader self._writer = client_writer self._waiting.release() self._read_loop_task = asyncio.Task(self._start_read_loop()) asyncio.async(self._read_loop_task, loop=self.loop) @asyncio.coroutine def disconnect(self): yield from self._writer.drain() self._writer.write_eof() self._read_loop_task.cancel() yield from self._read_loop_task while not self._reader.at_eof(): yield from self._reader.readline() if self.send: self.network.close() @asyncio.coroutine def _start_read_loop(self): if not self.send: #acks don't really do anything so don't listen for them while not self._reader.at_eof(): try: yield from self._read_message() except CancelledError: return except Exception: logger.exception("Smothering exception in DCC read loop") @asyncio.coroutine def _read_message(self): line = yield from self._reader.readline() m = re.match(b'(.*)(\r|\n|\r\n)$', line) assert m line = m.group(1) message = DCCMessage.parse(line) logger.debug("recv: %r", message) event = DirectMessage(self, message) self.read_queue.put_nowait((message, event)) @asyncio.coroutine def read_event(self): message, event = yield from self.read_queue.get() return event @asyncio.coroutine def say(self, message, target=None, no_respond=None): self.send_message(message) @asyncio.coroutine def send_message(self, message): message = DCCMessage(message) logger.debug("sent: %r", message) self._writer.write(message.render().encode('utf8') + b'\r\n') @asyncio.coroutine def transfer(self, path): yield from self._waiting.acquire() f = open(str(path), 'rb') block = b'\x01' while block != b'': block = f.read(1024) self._writer.write(block) f.close() self._waiting.release() return True
class StreamConnection: def __init__(self, sr, sw, *, loop=None): if not loop: loop = asyncio.get_event_loop() self._loop = loop self._sr = sr self._sw = sw self._msgs = Queue(loop=loop) self._worker = loop.create_task(self._run()) @asyncio.coroutine def _run(self): while self.alive(): try: data = yield from self._sr.readline() if data and len(data): self._msgs.put_nowait(self._convert(data)) except asyncio.CancelledError: logger.debug("readline from stream reader was cancelled.") except ConnectionError: logger.debug("connection error") break logger.debug("connection closed") def _convert(self, data): return data.strip() @asyncio.coroutine def recv(self): try: return self._msgs.get_nowait() except QueueEmpty: pass # Wait for a message until the connection is closed next_message = self._loop.create_task(self._msgs.get()) done, pending = yield from asyncio.wait( [next_message, self._worker], loop=self._loop, return_when=asyncio.FIRST_COMPLETED) if next_message in done: return next_message.result() else: next_message.cancel() def send(self, data): if not self.alive(): raise ConnectionError("connection was closed.") try: data = data + b'\n' self._sw.write(data) except OSError: raise ConnectionError("can't send data.") except Exception: logger.debug("Q___Q") def alive(self): return not self._sr.at_eof() @asyncio.coroutine def drain(): yield from self._sw.drain() @asyncio.coroutine def close(self): if self.alive(): try: yield from self._sw.drain() self._sw.write_eof() except ConnectionError: pass else: self._sr.feed_eof() self._sw.close() self._worker.cancel()
class WebSocketCommonProtocol(asyncio.StreamReaderProtocol): """ This class implements common parts of the WebSocket protocol. It assumes that the WebSocket connection is established. The handshake is managed in subclasses such as :class:`~websockets.server.WebSocketServerProtocol` and :class:`~websockets.client.WebSocketClientProtocol`. It runs a task that stores incoming data frames in a queue and deals with control frames automatically. It sends outgoing data frames and performs the closing handshake. The `host`, `port` and `secure` parameters are simply stored as attributes for handlers that need them. The `timeout` parameter defines the maximum wait time in seconds for completing the closing handshake and, only on the client side, for terminating the TCP connection. :meth:`close()` will complete in at most this time on the server side and twice this time on the client side. The `max_size` parameter enforces the maximum size for incoming messages in bytes. The default value is 1MB. ``None`` disables the limit. If a message larger than the maximum size is received, :meth:`recv()` will return ``None`` and the connection will be closed with status code 1009. Once the connection is closed, the status code is available in the :attr:`close_code` attribute and the reason in :attr:`close_reason`. """ # There are only two differences between the client-side and the server- # side behavior: masking the payload and closing the underlying TCP # connection. This class implements the server-side behavior by default. # To get the client-side behavior, set is_client = True. is_client = False state = 'OPEN' def __init__(self, *, host=None, port=None, secure=None, timeout=10, max_size=2 ** 20, loop=None): self.host = host self.port = port self.secure = secure self.timeout = timeout self.max_size = max_size super().__init__(asyncio.StreamReader(), self.client_connected, loop) self.close_code = None self.close_reason = '' # Futures tracking steps in the connection's lifecycle. self.opening_handshake = asyncio.Future() self.closing_handshake = asyncio.Future() self.connection_failed = asyncio.Future() self.connection_closed = asyncio.Future() # Queue of received messages. self.messages = Queue() # Mapping of ping IDs to waiters, in chronological order. self.pings = collections.OrderedDict() # Task managing the connection. self.worker = asyncio.async(self.run()) # In a subclass implementing the opening handshake, the state will be # CONNECTING at this point. if self.state == 'OPEN': self.opening_handshake.set_result(True) # Public API @property def open(self): """ This property is ``True`` when the connection is usable. It may be used to handle disconnections gracefully. """ return self.state == 'OPEN' @asyncio.coroutine def close(self, code=1000, reason=''): """ This coroutine performs the closing handshake. This is the expected way to terminate a connection on the server side. It waits for the other end to complete the handshake. It doesn't do anything once the connection is closed. It's usually safe to wrap this coroutine in `asyncio.async()` since errors during connection termination aren't particularly useful. The `code` must be an :class:`int` and the `reason` a :class:`str`. """ if self.state == 'OPEN': # 7.1.2. Start the WebSocket Closing Handshake self.close_code, self.close_reason = code, reason yield from self.write_frame(OP_CLOSE, serialize_close(code, reason)) # 7.1.3. The WebSocket Closing Handshake is Started self.state = 'CLOSING' # If the connection doesn't terminate within the timeout, break out of # the worker loop. try: yield from asyncio.wait_for(self.worker, timeout=self.timeout) except asyncio.TimeoutError: self.worker.cancel() # The worker should terminate quickly once it has been cancelled. yield from self.worker @asyncio.coroutine def recv(self): """ This coroutine receives the next message. It returns a :class:`str` for a text frame and :class:`bytes` for a binary frame. When the end of the message stream is reached, or when a protocol error occurs, :meth:`recv` returns ``None``, indicating that the connection is closed. """ # Return any available message try: return self.messages.get_nowait() except QueueEmpty: pass # Wait for a message until the connection is closed next_message = asyncio.async(self.messages.get()) done, pending = yield from asyncio.wait( [next_message, self.worker], return_when=asyncio.FIRST_COMPLETED) if next_message in done: return next_message.result() else: next_message.cancel() @asyncio.coroutine def send(self, data): """ This coroutine sends a message. It sends a :class:`str` as a text frame and :class:`bytes` as a binary frame. It raises a :exc:`TypeError` for other inputs and :exc:`InvalidState` once the connection is closed. """ if isinstance(data, str): opcode = 1 data = data.encode('utf-8') elif isinstance(data, bytes): opcode = 2 else: raise TypeError("data must be bytes or str") yield from self.write_frame(opcode, data) @asyncio.coroutine def ping(self, data=None): """ This coroutine sends a ping. It returns a Future which will be completed when the corresponding pong is received and which you may ignore if you don't want to wait. A ping may serve as a keepalive. """ # Protect against duplicates if a payload is explicitly set. if data in self.pings: raise ValueError("Already waiting for a pong with the same data") # Generate a unique random payload otherwise. while data is None or data in self.pings: data = struct.pack('!I', random.getrandbits(32)) self.pings[data] = asyncio.Future() yield from self.write_frame(OP_PING, data) return self.pings[data] @asyncio.coroutine def pong(self, data=b''): """ This coroutine sends a pong. An unsolicited pong may serve as a unidirectional heartbeat. """ yield from self.write_frame(OP_PONG, data) # Private methods - no guarantees. @asyncio.coroutine def run(self): # This coroutine guarantees that the connection is closed at exit. yield from self.opening_handshake while not self.closing_handshake.done(): try: msg = yield from self.read_message() if msg is None: break self.messages.put_nowait(msg) except asyncio.CancelledError: break except WebSocketProtocolError: yield from self.fail_connection(1002) except asyncio.IncompleteReadError: yield from self.fail_connection(1006) except UnicodeDecodeError: yield from self.fail_connection(1007) except PayloadTooBig: yield from self.fail_connection(1009) except Exception: yield from self.fail_connection(1011) raise yield from self.close_connection() @asyncio.coroutine def read_message(self): # Reassemble fragmented messages. frame = yield from self.read_data_frame(max_size=self.max_size) if frame is None: return if frame.opcode == OP_TEXT: text = True elif frame.opcode == OP_BINARY: text = False else: # frame.opcode == OP_CONT raise WebSocketProtocolError("Unexpected opcode") # Shortcut for the common case - no fragmentation if frame.fin: return frame.data.decode('utf-8') if text else frame.data # 5.4. Fragmentation chunks = [] max_size = self.max_size if text: decoder = codecs.getincrementaldecoder('utf-8')(errors='strict') if max_size is None: def append(frame): nonlocal chunks chunks.append(decoder.decode(frame.data, frame.fin)) else: def append(frame): nonlocal chunks, max_size chunks.append(decoder.decode(frame.data, frame.fin)) max_size -= len(frame.data) else: if max_size is None: def append(frame): nonlocal chunks chunks.append(frame.data) else: def append(frame): nonlocal chunks, max_size chunks.append(frame.data) max_size -= len(frame.data) append(frame) while not frame.fin: frame = yield from self.read_data_frame(max_size=max_size) if frame is None: raise WebSocketProtocolError("Incomplete fragmented message") if frame.opcode != OP_CONT: raise WebSocketProtocolError("Unexpected opcode") append(frame) return ('' if text else b'').join(chunks) @asyncio.coroutine def read_data_frame(self, max_size): # Deal with control frames automatically and return next data frame. # 6.2. Receiving Data while True: frame = yield from self.read_frame(max_size) # 5.5. Control Frames if frame.opcode == OP_CLOSE: self.close_code, self.close_reason = parse_close(frame.data) if self.state != 'CLOSING': # 7.1.3. The WebSocket Closing Handshake is Started self.state = 'CLOSING' yield from self.write_frame(OP_CLOSE, frame.data, 'CLOSING') if not self.closing_handshake.done(): self.closing_handshake.set_result(True) return elif frame.opcode == OP_PING: # Answer pings. yield from self.pong(frame.data) elif frame.opcode == OP_PONG: # Do not acknowledge pings on unsolicited pongs. if frame.data in self.pings: # Acknowledge all pings up to the one matching this pong. ping_id = None while ping_id != frame.data: ping_id, waiter = self.pings.popitem(0) if not waiter.cancelled(): waiter.set_result(None) # 5.6. Data Frames else: return frame @asyncio.coroutine def read_frame(self, max_size): is_masked = not self.is_client frame = yield from read_frame(self.reader.readexactly, is_masked, max_size=max_size) side = 'client' if self.is_client else 'server' logger.debug("%s << %s", side, frame) return frame @asyncio.coroutine def write_frame(self, opcode, data=b'', expected_state='OPEN'): # This may happen if a user attempts to write on a closed connection. if self.state != expected_state: raise InvalidState("Cannot write to a WebSocket " "in the {} state".format(self.state)) frame = Frame(True, opcode, data) side = 'client' if self.is_client else 'server' logger.debug("%s >> %s", side, frame) is_masked = self.is_client write_frame(frame, self.writer.write, is_masked) try: # Handle flow control automatically. yield from self.writer.drain() except ConnectionResetError: # Terminate the connection if the socket died, # unless it's already being closed. if expected_state != 'CLOSING': self.state = 'CLOSING' yield from self.fail_connection(1006) @asyncio.coroutine def close_connection(self): # 7.1.1. Close the WebSocket Connection if self.state == 'CLOSED': return # Defensive assertion for protocol compliance. if self.state != 'CLOSING': # pragma: no cover raise InvalidState("Cannot close a WebSocket connection " "in the {} state".format(self.state)) if self.is_client: try: yield from asyncio.wait_for(self.connection_closed, timeout=self.timeout) except (asyncio.CancelledError, asyncio.TimeoutError): pass if self.state == 'CLOSED': return # Attempt to terminate the TCP connection properly. # If the socket is already closed, this will crash. try: if self.writer.can_write_eof(): self.writer.write_eof() except Exception: pass self.writer.close() try: yield from asyncio.wait_for(self.connection_closed, timeout=self.timeout) except (asyncio.CancelledError, asyncio.TimeoutError): pass @asyncio.coroutine def fail_connection(self, code=1011, reason=''): # Avoid calling fail_connection more than once to minimize # the consequences of race conditions between the two sides. if self.connection_failed.done(): # Wait until the other coroutine calls connection_lost. yield from self.connection_closed return else: self.connection_failed.set_result(None) # Losing the connection usually results in a protocol error. # Preserve the original error code in this case. if self.close_code != 1006: self.close_code, self.close_reason = code, reason # 7.1.7. Fail the WebSocket Connection logger.info("Failing the WebSocket connection: %d %s", code, reason) if self.state == 'OPEN': yield from self.write_frame(OP_CLOSE, serialize_close(code, reason)) self.state = 'CLOSING' if not self.closing_handshake.done(): self.closing_handshake.set_result(False) yield from self.close_connection() # asyncio StreamReaderProtocol methods def client_connected(self, reader, writer): self.reader = reader self.writer = writer def connection_lost(self, exc): # 7.1.4. The WebSocket Connection is Closed self.state = 'CLOSED' if not self.connection_closed.done(): self.connection_closed.set_result(None) if self.close_code is None: self.close_code = 1006 super().connection_lost(exc)
class WebSocketCommonProtocol(asyncio.StreamReaderProtocol): """ This class implements common parts of the WebSocket protocol. It assumes that the WebSocket connection is established. The handshake is managed in subclasses such as :class:`~websockets.server.WebSocketServerProtocol` and :class:`~websockets.client.WebSocketClientProtocol`. It runs a task that stores incoming data frames in a queue and deals with control frames automatically. It sends outgoing data frames and performs the closing handshake. The `host`, `port` and `secure` parameters are simply stored as attributes for handlers that need them. The `timeout` parameter defines the maximum wait time in seconds for completing the closing handshake and, only on the client side, for terminating the TCP connection. :meth:`close()` will complete in at most this time on the server side and twice this time on the client side. Once the connection is closed, the status code is available in the :attr:`close_code` attribute and the reason in :attr:`close_reason`. """ # There are only two differences between the client-side and the server- # side behavior: masking the payload and closing the underlying TCP # connection. This class implements the server-side behavior by default. # To get the client-side behavior, set is_client = True. is_client = False state = 'OPEN' def __init__(self, *, host=None, port=None, secure=None, timeout=10, loop=None): self.host = host self.port = port self.secure = secure self.timeout = timeout super().__init__(asyncio.StreamReader(), self.client_connected, loop) self.close_code = None self.close_reason = '' # Futures tracking steps in the connection's lifecycle. self.opening_handshake = asyncio.Future() self.closing_handshake = asyncio.Future() self.connection_closed = asyncio.Future() # Queue of received messages. self.messages = Queue() # Mapping of ping IDs to waiters, in chronological order. self.pings = collections.OrderedDict() # Task managing the connection. self.worker = asyncio. async (self.run()) # In a subclass implementing the opening handshake, the state will be # CONNECTING at this point. if self.state == 'OPEN': self.opening_handshake.set_result(True) # Public API @property def open(self): """ This property is ``True`` when the connection is usable. It may be used to handle disconnections gracefully. """ return self.state == 'OPEN' @asyncio.coroutine def close(self, code=1000, reason=''): """ This coroutine performs the closing handshake. This is the expected way to terminate a connection on the server side. It waits for the other end to complete the handshake. It doesn't do anything once the connection is closed. It's usually safe to wrap this coroutine in `asyncio.async()` since errors during connection termination aren't particularly useful. The `code` must be an :class:`int` and the `reason` a :class:`str`. """ if self.state == 'OPEN': # 7.1.2. Start the WebSocket Closing Handshake self.close_code, self.close_reason = code, reason yield from self.write_frame(OP_CLOSE, serialize_close(code, reason)) # 7.1.3. The WebSocket Closing Handshake is Started self.state = 'CLOSING' # If the connection doesn't terminate within the timeout, break out of # the worker loop. try: yield from asyncio.wait_for(self.worker, timeout=self.timeout) except asyncio.TimeoutError: self.worker.cancel() # The worker should terminate quickly once it has been cancelled. yield from self.worker @asyncio.coroutine def recv(self): """ This coroutine receives the next message. It returns a :class:`str` for a text frame and :class:`bytes` for a binary frame. When the end of the message stream is reached, or when a protocol error occurs, :meth:`recv` returns ``None``, indicating that the connection is closed. """ # Return any available message try: return self.messages.get_nowait() except QueueEmpty: pass # Wait for a message until the connection is closed next_message = asyncio.Task(self.messages.get()) done, pending = yield from asyncio.wait( [next_message, self.worker], return_when=asyncio.FIRST_COMPLETED) if next_message in done: return next_message.result() @asyncio.coroutine def send(self, data): """ This coroutine sends a message. It sends a :class:`str` as a text frame and :class:`bytes` as a binary frame. It raises a :exc:`TypeError` for other inputs and :exc:`InvalidState` once the connection is closed. """ if isinstance(data, str): opcode = 1 data = data.encode('utf-8') elif isinstance(data, bytes): opcode = 2 else: raise TypeError("data must be bytes or str") yield from self.write_frame(opcode, data) @asyncio.coroutine def ping(self, data=None): """ This coroutine sends a ping. It returns a Future which will be completed when the corresponding pong is received and which you may ignore if you don't want to wait. A ping may serve as a keepalive. """ # Protect against duplicates if a payload is explicitly set. if data in self.pings: raise ValueError("Already waiting for a pong with the same data") # Generate a unique random payload otherwise. while data is None or data in self.pings: data = struct.pack('!I', random.getrandbits(32)) self.pings[data] = asyncio.Future() yield from self.write_frame(OP_PING, data) return self.pings[data] @asyncio.coroutine def pong(self, data=b''): """ This coroutine sends a pong. An unsolicited pong may serve as a unidirectional heartbeat. """ yield from self.write_frame(OP_PONG, data) # Private methods - no guarantees. @asyncio.coroutine def run(self): # This coroutine guarantees that the connection is closed at exit. yield from self.opening_handshake while not self.closing_handshake.done(): try: msg = yield from self.read_message() if msg is None: break self.messages.put_nowait(msg) except asyncio.CancelledError: break except WebSocketProtocolError: yield from self.fail_connection(1002) except UnicodeDecodeError: yield from self.fail_connection(1007) except Exception: yield from self.fail_connection(1011) raise yield from self.close_connection() @asyncio.coroutine def read_message(self): # Reassemble fragmented messages. frame = yield from self.read_data_frame() if frame is None: return if frame.opcode == OP_TEXT: text = True elif frame.opcode == OP_BINARY: text = False else: # frame.opcode == OP_CONT raise WebSocketProtocolError("Unexpected opcode") # Shortcut for the common case - no fragmentation if frame.fin: return frame.data.decode('utf-8') if text else frame.data # 5.4. Fragmentation chunks = [] if text: decoder = codecs.getincrementaldecoder('utf-8')(errors='strict') append = lambda f: chunks.append(decoder.decode(f.data, f.fin)) else: append = lambda f: chunks.append(f.data) append(frame) while not frame.fin: frame = yield from self.read_data_frame() if frame is None: raise WebSocketProtocolError("Incomplete fragmented message") if frame.opcode != OP_CONT: raise WebSocketProtocolError("Unexpected opcode") append(frame) return ('' if text else b'').join(chunks) @asyncio.coroutine def read_data_frame(self): # Deal with control frames automatically and return next data frame. # 6.2. Receiving Data while True: frame = yield from self.read_frame() # 5.5. Control Frames if frame.opcode == OP_CLOSE: self.close_code, self.close_reason = parse_close(frame.data) if self.state != 'CLOSING': # 7.1.3. The WebSocket Closing Handshake is Started self.state = 'CLOSING' yield from self.write_frame(OP_CLOSE, frame.data, 'CLOSING') self.closing_handshake.set_result(True) return elif frame.opcode == OP_PING: # Answer pings. yield from self.pong(frame.data) elif frame.opcode == OP_PONG: # Do not acknowledge pings on unsolicited pongs. if frame.data in self.pings: # Acknowledge all pings up to the one matching this pong. ping_id = None while ping_id != frame.data: ping_id, waiter = self.pings.popitem(0) if not waiter.cancelled(): waiter.set_result(None) # 5.6. Data Frames else: return frame @asyncio.coroutine def read_frame(self): is_masked = not self.is_client frame = yield from read_frame(self.reader.readexactly, is_masked) side = 'client' if self.is_client else 'server' logger.debug("%s << %s", side, frame) return frame @asyncio.coroutine def write_frame(self, opcode, data=b'', expected_state='OPEN'): # This may happen if a user attempts to write on a closed connection. if self.state != expected_state: raise InvalidState("Cannot write to a WebSocket " "in the {} state".format(self.state)) frame = Frame(True, opcode, data) side = 'client' if self.is_client else 'server' logger.debug("%s >> %s", side, frame) is_masked = self.is_client write_frame(frame, self.writer.write, is_masked) # Handle flow control automatically. try: yield from self.writer.drain() except ConnectionResetError: pass @asyncio.coroutine def close_connection(self): # 7.1.1. Close the WebSocket Connection if self.state == 'CLOSED': return # Defensive assertion for protocol compliance. if self.state != 'CLOSING': # pragma: no cover raise InvalidState("Cannot close a WebSocket connection " "in the {} state".format(self.state)) if self.is_client: try: yield from asyncio.wait_for(self.connection_closed, timeout=self.timeout) except (asyncio.CancelledError, asyncio.TimeoutError): pass if self.state == 'CLOSED': return if self.writer.can_write_eof(): self.writer.write_eof() self.writer.close() try: yield from asyncio.wait_for(self.connection_closed, timeout=self.timeout) except (asyncio.CancelledError, asyncio.TimeoutError): pass @asyncio.coroutine def fail_connection(self, code=1011, reason=''): # Losing the connection usually results in a protocol error. # Preserve the original error code in this case. if self.close_code != 1006: self.close_code, self.close_reason = code, reason # 7.1.7. Fail the WebSocket Connection logger.info("Failing the WebSocket connection: %d %s", code, reason) if self.state == 'OPEN': yield from self.write_frame(OP_CLOSE, serialize_close(code, reason)) self.state = 'CLOSING' if not self.closing_handshake.done(): self.closing_handshake.set_result(False) yield from self.close_connection() # asyncio StreamReaderProtocol methods def client_connected(self, reader, writer): self.reader = reader self.writer = writer def connection_lost(self, exc): # 7.1.4. The WebSocket Connection is Closed self.state = 'CLOSED' if not self.connection_closed.done(): self.connection_closed.set_result(None) if self.close_code is None: self.close_code = 1006 super().connection_lost(exc)
class IRCClient: """Higher-level IRC client. Takes care of most of the hard parts of IRC: incoming server messages are bundled into more intelligible events (see ``dywypi.event``), and commands that expect replies are implemented as coroutines. """ def __init__(self, loop, network): self.loop = loop self.network = network # TODO should this be a param? a property of the network? or, more # likely, channel-specific and decoded separately and... self.charset = "utf8" self.joined_channels = {} # name => Channel # IRC server features, as reported by ISUPPORT, with defaults taken # from the RFC. self.len_nick = 9 self.len_channel = 200 self.len_message = 510 # These lengths don't have limits mentioned in the RFC, so going with # the smallest known values in the wild self.len_kick = 80 self.len_topic = 80 self.len_away = 160 self.max_watches = 0 self.max_targets = 1 self.channel_types = set("#&") self.channel_modes = {} # TODO, haha. self.channel_prefixes = {} # TODO here too. IRCMode is awkward. self.network_title = self.network.name self.features = {} # Various intermediate state used for waiting for replies and # aggregating multi-part replies # TODO hmmm so what happens if state just gets left here forever? do # we care? self._pending_names = {} self._names_futures = {} self._pending_topics = {} self._join_futures = {} self._message_waiters = deque() self.read_queue = Queue(loop=loop) def get_channel(self, channel_name): """Returns a `Channel` object containing everything the client definitively knows about the given channel. Note that if you, say, ask for the topic of a channel you aren't in and then immediately call `get_channel`, the returned object won't have its topic populated. State is only tracked persistently for channels the bot is in; otherwise there's no way to know whether or not it's stale. """ if channel_name in self.joined_channels: return self.joined_channels[channel_name] else: return IRCChannel(self, channel_name) @asyncio.coroutine def connect(self): """Coroutine for connecting to a single server. Note that this will nonblock until the client is "registered", defined as the first PING/PONG exchange. """ # TODO this is a poor excuse for round-robin :) server = self.current_server = self.network.servers[0] # TODO i'm pretty sure the server tells us what our nick is, and we # should believe that instead self.nick = self.network.preferred_nick # TODO: handle disconnection, somehow. probably affects a lot of # things. self._reader, self._writer = yield from server.connect(self.loop) if server.password: self.send_message("PASS", server.password) self.send_message("NICK", self.nick) self.send_message("USER", "dywypi", "-", "-", "dywypi Python IRC bot") # Start the reader loop, or we can't respond to anything self._read_loop_task = asyncio.Task(self._start_read_loop()) asyncio.async(self._read_loop_task, loop=self.loop) @asyncio.coroutine def disconnect(self): # Quit self.send_message("QUIT", "Seeya!") # Flush the write buffer yield from self._writer.drain() self._writer.close() # Stop reading events self._read_loop_task.cancel() # This looks a little funny since this task is already running, but we # want to block until it's actually done, which might require dipping # back into the event loop yield from self._read_loop_task # Read until the connection closes while not self._reader.at_eof(): yield from self._reader.readline() @asyncio.coroutine def _start_read_loop(self): """Internal coroutine that just keeps reading from the server in a loop. Called once after a connect and should never be called again after that. """ # TODO this is currently just to keep the message queue going, but # eventually it should turn them into events and stuff them in an event # queue while not self._reader.at_eof(): try: yield from self._read_message() except CancelledError: return except Exception: log.exception("Smothering exception in IRC read loop") @asyncio.coroutine def gather_messages(self, *start, finish): fut = asyncio.Future() messages = {} for command in start: messages[command] = False for command in finish: messages[command] = True collected = [] self._message_waiters.append((fut, messages, collected)) yield from fut return collected def _possibly_gather_message(self, message): if not self._message_waiters: return # TODO there is a general ongoing problem here with matching up # responses. ESPECIALLY when error codes are possible. something here # is gonna have to get a bit fancier. fut, waiting_on, collected = self._message_waiters[0] # TODO is it possible for even a PING to appear in the middle of # some other response? # TODO this is still susceptible to weirdness when there's, say, a # queued error response to a PRIVMSG on its way back; it'll look # like the call we just made failed, and all the real responses # will be dropped. can we assume some set of error replies ONLY # happen in response to sending a message of some kind, maybe? # TODO for that matter, where does the error response to a PRIVMSG # even go? the whole problem is that we can't know for sure when # it succeeded, unless we put a timeout on every call to say() finish = False if message.command in waiting_on: finish = waiting_on[message.command] elif message.is_error: # Always consider an error as finishing # TODO but we might have gotten this error in response to something # else we did before this message... :S if message.command in {"ERR_CANNOTSENDTOCHAN"}: # Looks like a PRIVMSG error or similar, so probably not a # response to this particular message. return finish = True elif not collected: # Got a regular response we weren't expecting, AND this future # hasn't started collecting yet -- the response probably just # hasn't started coming back yet, so don't do anything yet. return # If we get here, we expected this response, and should keep # feeding into this future. collected.append(message) if finish: # Done, one way or another self._message_waiters.popleft() if message.is_error: fut.set_exception(IRCError(message)) else: fut.set_result(collected) @asyncio.coroutine def _read_message(self): """Internal dispatcher for messages received from the server.""" line = yield from self._reader.readline() assert line.endswith(b"\r\n") line = line[:-2] # TODO valerr, unicodeerr message = IRCMessage.parse(line.decode(self.charset)) log.debug("recv: %r", message) # TODO unclear whether this should go before or after _handle_foo self._possibly_gather_message(message) # Boy do I ever hate this pattern but it's slightly more maintainable # than a 500-line if tree. handler = getattr(self, "_handle_" + message.command, None) event = None if handler: event = handler(message) self.read_queue.put_nowait((message, event)) def _handle_PING(self, message): # PONG self.send_message("PONG", message.args[-1]) def _handle_RPL_WELCOME(self, message): # Initial registration: do autojoins, and any other onconnect work for channel_name in self.network.autojoins: asyncio.async(self.join(channel_name), loop=self.loop) def _handle_RPL_ISUPPORT(self, message): me, *features, human_text = message.args for feature_string in features: feature, _, value = feature_string.partition("=") if value is None: value = True self.features[feature] = value if feature == "NICKLEN": self.len_nick = int(value) elif feature == "CHANNELLEN": self.len_channel = int(value) elif feature == "KICKLEN": self.len_kick = int(value) elif feature == "TOPICLEN": self.len_topic = int(value) elif feature == "AWAYLEN": self.len_away = int(value) elif feature == "WATCH": self.max_watches = int(value) elif feature == "CHANTYPES": self.channel_types = set(value) elif feature == "PREFIX": # List of channel user modes, in relative priority order, in # the format (ov)@+ assert value[0] == "(" letters, symbols = value[1:].split(")") assert len(letters) == len(symbols) self.channel_prefixes.clear() for letter, symbol in zip(letters, symbols): mode = IRCMode(letter, prefix=symbol) self.channel_modes[letter] = mode self.channel_prefixes[symbol] = mode elif feature == "MAXTARGETS": self.max_targets = int(value) elif feature == "CHANMODES": # Four groups delimited by lists: list-style (+b), arg required # (+k), arg required only to set (+l), argless lists, args, argsets, argless = value.split(",") for letter in lists: self.channel_modes[letter] = IRCMode(letter, multi=True) for letter in args: self.channel_modes[letter] = IRCMode(letter, arg_on_set=True, arg_on_remove=True) for letter in argsets: self.channel_modes[letter] = IRCMode(letter, arg_on_set=True) for letter in argless: self.channel_modes[letter] = IRCMode(letter) elif feature == "NETWORK": self.network_title = value def _handle_JOIN(self, message): channel_name, = message.args joiner = Peer.from_prefix(message.prefix) # TODO should there be a self.me? how... if joiner.name == self.nick: # We just joined a channel # assert channel_name not in self.joined_channels # TODO key? do we care? # TODO what about channel configuration and anon non-joined # channels? how do these all relate... channel = IRCChannel(self, channel_name) self.joined_channels[channel.name] = channel else: # Someone else just joined the channel self.joined_channels[channel_name].add_user(joiner) def _handle_RPL_TOPIC(self, message): # Topic. Sent when joining or when requesting the topic. # TODO this doesn't handle the "requesting" part # TODO what if me != me? me, channel_name, topic_text = message.args self._pending_topics[channel_name] = IRCTopic(topic_text) def _handle_RPL_TOPICWHOTIME(self, message): # Topic author (NONSTANDARD). Sent after RPL_TOPIC. # Unfortunately, there's no way to know whether to expect this. # TODO this doesn't handle the "requesting" part # TODO what if me != me? me, channel_name, author, timestamp = message.args topic = self._pending_topics.setdefault(channel_name, IRCTopic("")) topic.author = Peer.from_prefix(author) topic.timestamp = datetime.utcfromtimestamp(int(timestamp)) def _handle_RPL_NAMREPLY(self, message): # Names response. Sent when joining or when requesting a names # list. Must be ended with a RPL_ENDOFNAMES. me, useless_equals_sign, channel_name, *raw_names = message.args # List of names is actually optional (?!) if raw_names: raw_names = raw_names[0] else: raw_names = "" names = raw_names.strip(" ").split(" ") namelist = self._pending_names.setdefault(channel_name, []) # TODO modes? should those be stripped off here? # TODO for that matter should these become peers here? namelist.extend(names) def _handle_RPL_ENDOFNAMES(self, message): # End of names list. Sent at the very end of a join or the very # end of a NAMES request. me, channel_name, info = message.args namelist = self._pending_names.pop(channel_name, []) if channel_name in self._names_futures: # TODO we should probably not ever have a names future AND a # pending join at the same time. or, does it matter? self._names_futures[channel_name].set_result(namelist) del self._names_futures[channel_name] if channel_name in self.joined_channels: # Join synchronized! channel = self.joined_channels[channel_name] channel.sync = True channel.topic = self._pending_topics.pop(channel_name, None) for name in namelist: modes = set() # TODO use features! while name and name[0] in "+%@&~": modes.add(name[0]) name = name[1:] # TODO haha no this is so bad. # TODO the bot should, obviously, keep a record of all # known users as well. alas, mutable everything. peer = Peer(name, None, None) channel.add_user(peer, modes) if channel_name in self._join_futures: # Update the Future self._join_futures[channel_name].set_result(channel) del self._join_futures[channel_name] def _handle_PRIVMSG(self, message): # PRIVMSG target :text target_name, text = message.args source = Peer.from_prefix(message.prefix) if target_name[0] in self.channel_types: target = self.get_channel(target_name) cls = PublicMessage else: # TODO this is /us/, so, surely ought to be known target = Peer(target_name, None, None) cls = PrivateMessage return cls(source, target, text, client=self, raw=message) @asyncio.coroutine def read_event(self): """Produce a single IRC event. This client does not do any kind of multiplexing or event handler notification; that's left to a higher level. """ message, event = yield from self.read_queue.get() return event # Implementations of particular commands # TODO should these be part of the general client interface, or should # there be a separate thing that smooths out the details? @asyncio.coroutine def whois(self, target): """Coroutine that queries for information about a target.""" self.send_message("WHOIS", target) messages = yield from self.gather_messages( "RPL_WHOISUSER", "RPL_WHOISSERVER", "RPL_WHOISOPERATOR", "RPL_WHOISIDLE", "RPL_WHOISCHANNELS", "RPL_WHOISVIRT", "RPL_WHOIS_HIDDEN", "RPL_WHOISSPECIAL", "RPL_WHOISSECURE", "RPL_WHOISSTAFF", "RPL_WHOISLANGUAGE", finish=["RPL_ENDOFWHOIS", "ERR_NOSUCHSERVER", "ERR_NONICKNAMEGIVEN", "ERR_NOSUCHNICK"], ) # nb: The first two args for all the responses are our nick and the # target's nick. # TODO apparently you can whois multiple nicks at a time for message in messages: if message.command == "RPL_WHOISUSER": ident = message.args[2] hostname = message.args[3] # args[4] is a literal * realname = message.args[5] elif message.command == "RPL_WHOISIDLE": # Idle time. Some servers (at least, inspircd) also have # signon time as unixtime. idle = timedelta(seconds=int(message.args[2])) elif message.command == "RPL_WHOISCHANNELS": # TODO split and parse out the usermodes # TODO don't some servers have an extension with multiple modes # here channels = message.args[2] elif message.command == "RPL_WHOISSERVER": server = message.args[2] server_desc = message.args[3] return messages @asyncio.coroutine def say(self, target, message): """Coroutine that sends a message to a target, which may be either a `Channel` or a `Peer`. """ self.send_message("PRIVMSG", target, message) @asyncio.coroutine def join(self, channel_name, key=None): """Coroutine that joins a channel, and nonblocks until the join is "synchronized" (defined as receiving the nick list). """ if channel_name in self._join_futures: return self._join_futures[channel_name] # TODO multiple? error on commas? if key is None: self.send_message("JOIN", channel_name) else: self.send_message("JOIN", channel_name, key) # Clear out any lingering names list self._pending_names[channel_name] = [] # Return a Future, to be populated by the message loop fut = self._join_futures[channel_name] = asyncio.Future() return fut @asyncio.coroutine def names(self, channel_name): """Coroutine that returns a list of names in a channel.""" # TODO there's some ISUPPORT extension that lists /all/ channel modes # on each name that comes back... support that? self.send_message("NAMES", channel_name) # No need to do the same thing twice if channel_name in self._names_futures: return self._names_futures[channel_name] # Clear out any lingering names list self._pending_names[channel_name] = [] # Return a Future, to be populated by the message loop fut = self._names_futures[channel_name] = asyncio.Future() return fut def set_topic(self, channel, topic): """Sets the channel topic.""" self.send_message("TOPIC", channel, topic) # TODO unclear whether this stuff should be separate or what; it's less # about the protocol and more about the dywypi interface def send_message(self, command, *args): message = IRCMessage(command, *args) log.debug("sent: %r", message) self._writer.write(message.render().encode(self.charset) + b"\r\n") def format_transition(self, current_style, new_style): if new_style == Style.default(): # Reset code, ^O return "\x0f" if new_style.fg != current_style.fg and new_style.fg is Color.default: # IRC has no "reset to default" code. mIRC claims color 99 is for # this, but it lies, at least in irssi. So we must reset and # reapply everything. ret = "\x0f" if new_style.bold is Bold.on: ret += "\x02" return ret ret = "" if new_style.fg != current_style.fg: ret += FOREGROUND_CODES[new_style.fg] if new_style.bold != current_style.bold: # There's no on/off for bold, just a toggle ret += "\x02" return ret
class Dumper(BaseDumper): """ index dumper """ def __init__(self, url: str, outdir: str, **kwargs): super(Dumper, self).__init__(url, outdir, **kwargs) self.netloc = urlparse(url).netloc self.fetched_urls = [] self.task_count = 10 # 协程数量 self.running = False async def start(self): """ 入口方法 """ # queue必须创建在run()方法内 https://stackoverflow.com/questions/53724665/using-queues-results-in-asyncio-exception-got-future-future-pending-attached self.targets_q = Queue() # url, name await self.targets_q.put((self.url, "index")) self.running = True tasks = [] for _ in range(self.task_count): tasks.append(asyncio.create_task(self.dump())) for t in tasks: await t self.running = False async def dump(self): """ 核心下载方法 """ while self.running: # queue.get会一直等待,需要设置超时 # 但不能使用get_nowait,会变成单任务 try: url, name = await asyncio.wait_for(self.targets_q.get(), 7) except Exception as e: # self.error_log("Failed to get item form queue.", e=e) break if url in self.fetched_urls: continue # 下载保存 await self.download((url, name)) self.fetched_urls.append(url) # 如果是html则提取链接 if await self.is_html(url): async with aiohttp.ClientSession( connector=self.connector, timeout=self.timeout) as session: try: async with session.get(url, headers=self.headers) as resp: d = pq(await resp.text()) # 遍历链接 for a in d("a"): txt = pq(a).text() href = pq(a).attr("href") # 没有文字或链接的不要 if not txt or not href: continue href_parsed = urlparse(href) if href_parsed.netloc: # 不在同一个域下不要 if href_parsed.netloc != self.netloc: continue if href_parsed.scheme: # 不是http协议不要 if not href_parsed.scheme.startswith( "http"): continue new_url = urljoin(url, href_parsed.path) fullname = urlparse(new_url).path.lstrip("/") await self.targets_q.put((new_url, fullname)) # self.targets_q.put_nowait((new_url, fullname)) except Exception as e: msg = "Failed to dump url %s" % url self.error_log(msg=msg, e=e) finally: await session.close() if self.targets_q.empty(): break async def is_html(self, url) -> bool: """ 判断目标URL是不是属于html页面 """ async with aiohttp.ClientSession(connector=self.connector, timeout=self.timeout) as session: try: async with session.head(url, headers=self.headers) as resp: return bool("html" in resp.headers.get("content-type", "")) except Exception as e: msg = "Failed to dump url %s" % url self.error_log(msg=msg, e=e) finally: await session.close()
class IRCClient: """Higher-level IRC client. Takes care of most of the hard parts of IRC: incoming server messages are bundled into more intelligible events (see ``dywypi.event``), and commands that expect replies are implemented as coroutines. """ def __init__(self, loop, network): self.loop = loop self.network = network # TODO should this be a param? a property of the network? or, more # likely, channel-specific and decoded separately and... self.charset = 'utf8' self.joined_channels = {} # name => Channel # IRC server features, as reported by ISUPPORT, with defaults taken # from the RFC. self.len_nick = 9 self.len_channel = 200 self.len_message = 510 # These lengths don't have limits mentioned in the RFC, so going with # the smallest known values in the wild self.len_kick = 80 self.len_topic = 80 self.len_away = 160 self.max_watches = 0 self.max_targets = 1 self.channel_types = set('#&') self.channel_modes = {} # TODO, haha. self.channel_prefixes = {} # TODO here too. IRCMode is awkward. self.network_title = self.network.name self.features = {} # Various intermediate state used for waiting for replies and # aggregating multi-part replies # TODO hmmm so what happens if state just gets left here forever? do # we care? self._pending_names = {} self._names_futures = {} self._pending_topics = {} self._join_futures = {} self._message_waiters = OrderedDict() self.read_queue = Queue(loop=loop) def get_channel(self, channel_name): """Returns a `Channel` object containing everything the client definitively knows about the given channel. Note that if you, say, ask for the topic of a channel you aren't in and then immediately call `get_channel`, the returned object won't have its topic populated. State is only tracked persistently for channels the bot is in; otherwise there's no way to know whether or not it's stale. """ if channel_name in self.joined_channels: return self.joined_channels[channel_name] else: return IRCChannel(self, channel_name) @asyncio.coroutine def connect(self): """Coroutine for connecting to a single server. Note that this will nonblock until the client is "registered", defined as the first PING/PONG exchange. """ # TODO this is a poor excuse for round-robin :) server = self.current_server = self.network.servers[0] # TODO i'm pretty sure the server tells us what our nick is, and we # should believe that instead self.nick = self.network.preferred_nick # TODO: handle disconnection, somehow. probably affects a lot of # things. self._reader, self._writer = yield from server.connect(self.loop) log.debug('connected!') if server.password: self.send_message('PASS', server.password) self.send_message('NICK', self.nick) self.send_message('USER', 'dywypi', '-', '-', 'dywypi Python IRC bot') # Start the reader loop, or we can't respond to anything self._read_loop_task = asyncio.Task(self._start_read_loop()) asyncio.async(self._read_loop_task, loop=self.loop) @asyncio.coroutine def disconnect(self): # Quit self.send_message('QUIT', 'Seeya!') # Flush the write buffer yield from self._writer.drain() self._writer.close() # Stop reading events self._read_loop_task.cancel() # This looks a little funny since this task is already running, but we # want to block until it's actually done, which might require dipping # back into the event loop yield from self._read_loop_task # Read until the connection closes while not self._reader.at_eof(): yield from self._reader.readline() @asyncio.coroutine def _start_read_loop(self): """Internal coroutine that just keeps reading from the server in a loop. Called once after a connect and should never be called again after that. """ # TODO this is currently just to keep the message queue going, but # eventually it should turn them into events and stuff them in an event # queue while not self._reader.at_eof(): try: yield from self._read_message() except CancelledError: return except Exception: log.exception("Smothering exception in IRC read loop") @asyncio.coroutine def gather_messages(self, *middle, end, errors=()): fut = asyncio.Future() messages = {} for command in middle: messages[command] = 'middle' for command in end: messages[command] = 'end' for command in errors: messages[command] = 'error' collected = [] self._message_waiters[fut] = (messages, collected) yield from fut return collected @asyncio.coroutine def _read_message(self): """Internal dispatcher for messages received from the server.""" line = yield from self._reader.readline() assert line.endswith(b'\r\n') line = line[:-2] # TODO valerr, unicodeerr message = IRCMessage.parse(line.decode(self.charset)) log.debug("recv: %r", message) # TODO there is a general ongoing problem here with matching up # responses. ESPECIALLY when error codes are possible. something here # is gonna have to get a bit fancier. for fut, (waiting_on, collected) in self._message_waiters.items(): # TODO this needs to handle error codes too, or the future will # linger forever! potential problem: if the server is lagging # behind us, an error code might actually map to a privmsg we tried # to send (which has no success response) and we'll get all f****d # up. i don't know if there's any way to solve this. # TODO hey stupid question: after we've seen ANY of the waited-on # messages, should we pipe all subsequent messages into that future # until we see the one that's supposed to end it? something like # a forced JOIN could screw up a join attempt, for example, but if # we're getting RPL_TOPIC when we didn't actually ask for the # topic, THEN we know we're definitely in the join sequence. # TODO also given normal irc response flow, i'm pretty sure we # should only ever need to check the first pending future. there's # no way we should need to skip around. # TODO maybe give these a timeout so a bad one doesn't f**k us up # forever if message.command in waiting_on: collected.append(message) if waiting_on[message.command] == 'end': fut.set_result(collected) del self._message_waiters[fut] elif waiting_on[message.command] == 'error': fut.set_exception(IRCError(message)) del self._message_waiters[fut] break # Boy do I ever hate this pattern but it's slightly more maintainable # than a 500-line if tree. handler = getattr(self, '_handle_' + message.command, None) event = None if handler: event = handler(message) self.read_queue.put_nowait((message, event)) def _handle_PING(self, message): # PONG self.send_message('PONG', message.args[-1]) def _handle_RPL_WELCOME(self, message): # Initial registration: do autojoins, and any other onconnect work self.network.hostname = message.args[1].rsplit(sep='@')[-1] for channel_name in self.network.autojoins: asyncio.async(self.join(channel_name), loop=self.loop) def _handle_RPL_ISUPPORT(self, message): me, *features, human_text = message.args for feature_string in features: feature, _, value = feature_string.partition('=') if value is None: value = True self.features[feature] = value if feature == 'NICKLEN': self.len_nick = int(value) elif feature == 'CHANNELLEN': self.len_channel = int(value) elif feature == 'KICKLEN': self.len_kick = int(value) elif feature == 'TOPICLEN': self.len_topic = int(value) elif feature == 'AWAYLEN': self.len_away = int(value) elif feature == 'WATCH': self.max_watches = int(value) elif feature == 'CHANTYPES': self.channel_types = set(value) elif feature == 'PREFIX': # List of channel user modes, in relative priority order, in # the format (ov)@+ assert value[0] == '(' letters, symbols = value[1:].split(')') assert len(letters) == len(symbols) self.channel_prefixes.clear() for letter, symbol in zip(letters, symbols): mode = IRCMode(letter, prefix=symbol) self.channel_modes[letter] = mode self.channel_prefixes[symbol] = mode elif feature == 'MAXTARGETS': self.max_targets = int(value) elif feature == 'CHANMODES': # Four groups delimited by lists: list-style (+b), arg required # (+k), arg required only to set (+l), argless lists, args, argsets, argless = value.split(',') for letter in lists: self.channel_modes[letter] = IRCMode( letter, multi=True) for letter in args: self.channel_modes[letter] = IRCMode( letter, arg_on_set=True, arg_on_remove=True) for letter in argsets: self.channel_modes[letter] = IRCMode( letter, arg_on_set=True) for letter in argless: self.channel_modes[letter] = IRCMode(letter) elif feature == 'NETWORK': self.network_title = value def _handle_JOIN(self, message): channel_name, = message.args joiner = Peer.from_prefix(message.prefix) # TODO should there be a self.me? how... if joiner.name == self.nick: # We just joined a channel #assert channel_name not in self.joined_channels # TODO key? do we care? # TODO what about channel configuration and anon non-joined # channels? how do these all relate... channel = IRCChannel(self, channel_name) self.joined_channels[channel.name] = channel else: # Someone else just joined the channel self.joined_channels[channel_name].add_user(joiner) def _handle_RPL_TOPIC(self, message): # Topic. Sent when joining or when requesting the topic. # TODO this doesn't handle the "requesting" part # TODO what if me != me? me, channel_name, topic_text = message.args self._pending_topics[channel_name] = IRCTopic(topic_text) def _handle_RPL_TOPICWHOTIME(self, message): # Topic author (NONSTANDARD). Sent after RPL_TOPIC. # Unfortunately, there's no way to know whether to expect this. # TODO this doesn't handle the "requesting" part # TODO what if me != me? me, channel_name, author, timestamp = message.args topic = self._pending_topics.setdefault(channel_name, IRCTopic('')) topic.author = Peer.from_prefix(author) topic.timestamp = datetime.utcfromtimestamp(int(timestamp)) def _handle_RPL_NAMREPLY(self, message): # Names response. Sent when joining or when requesting a names # list. Must be ended with a RPL_ENDOFNAMES. me, useless_equals_sign, channel_name, *raw_names = message.args # List of names is actually optional (?!) if raw_names: raw_names = raw_names[0] else: raw_names = '' names = raw_names.strip(' ').split(' ') namelist = self._pending_names.setdefault(channel_name, []) # TODO modes? should those be stripped off here? # TODO for that matter should these become peers here? namelist.extend(names) def _handle_RPL_ENDOFNAMES(self, message): # End of names list. Sent at the very end of a join or the very # end of a NAMES request. me, channel_name, info = message.args namelist = self._pending_names.pop(channel_name, []) if channel_name in self._names_futures: # TODO we should probably not ever have a names future AND a # pending join at the same time. or, does it matter? self._names_futures[channel_name].set_result(namelist) del self._names_futures[channel_name] if channel_name in self.joined_channels: # Join synchronized! channel = self.joined_channels[channel_name] channel.sync = True channel.topic = self._pending_topics.pop(channel_name, None) for name in namelist: modes = set() # TODO use features! while name and name[0] in '+%@&~': modes.add(name[0]) name = name[1:] # TODO haha no this is so bad. # TODO the bot should, obviously, keep a record of all # known users as well. alas, mutable everything. peer = Peer(name, None, None) channel.add_user(peer, modes) if channel_name in self._join_futures: # Update the Future self._join_futures[channel_name].set_result(channel) del self._join_futures[channel_name] def _handle_PRIVMSG(self, message): # PRIVMSG target :text target_name, text = message.args source = Peer.from_prefix(message.prefix) if target_name[0] in self.channel_types: target = self.get_channel(target_name) cls = PublicMessage else: # TODO this is /us/, so, surely ought to be known target = Peer(target_name, None, None) cls = PrivateMessage return cls(source, target, text, client=self, raw=message) @asyncio.coroutine def read_event(self): """Produce a single IRC event. This client does not do any kind of multiplexing or event handler notification; that's left to a higher level. """ message, event = yield from self.read_queue.get() return event # Implementations of particular commands # TODO should these be part of the general client interface, or should # there be a separate thing that smooths out the details? @asyncio.coroutine def whois(self, target): """Coroutine that queries for information about a target.""" self.send_message('WHOIS', target) messages = yield from self.gather_messages( 'RPL_WHOISUSER', 'RPL_WHOISSERVER', 'RPL_WHOISOPERATOR', 'RPL_WHOISIDLE', 'RPL_WHOISCHANNELS', 'RPL_WHOISVIRT', 'RPL_WHOIS_HIDDEN', 'RPL_WHOISSPECIAL', 'RPL_WHOISSECURE', 'RPL_WHOISSTAFF', 'RPL_WHOISLANGUAGE', end=[ 'RPL_ENDOFWHOIS', ], errors=[ 'ERR_NOSUCHSERVER', 'ERR_NONICKNAMEGIVEN', 'ERR_NOSUCHNICK', ], ) # nb: The first two args for all the responses are our nick and the # target's nick. # TODO apparently you can whois multiple nicks at a time for message in messages: if message.command == 'RPL_WHOISUSER': ident, hostname ident = message.args[2] hostname = message.args[3] # args[4] is a literal * realname = message.args[5] elif message.command == 'RPL_WHOISIDLE': # Idle time. Some servers (at least, inspircd) also have # signon time as unixtime. idle = timedelta(seconds=int(message.args[2])) elif message.command == 'RPL_WHOISCHANNELS': # TODO split and parse out the usermodes # TODO don't some servers have an extension with multiple modes # here channels = message.args[2] elif message.command == 'RPL_WHOISSERVER': server = message.args[2] server_desc = message.args[3] return messages @asyncio.coroutine def say(self, message, target, notice=False): """Coroutine that sends a message to a target, which may be either a `Channel` or a `Peer`. """ command = 'NOTICE' if notice else 'PRIVMSG' self.send_message(command, target, message) @asyncio.coroutine def join(self, channel_name, key=None): """Coroutine that joins a channel, and nonblocks until the join is "synchronized" (defined as receiving the nick list). """ if channel_name in self._join_futures: return self._join_futures[channel_name] # TODO multiple? error on commas? if key is None: self.send_message('JOIN', channel_name) else: self.send_message('JOIN', channel_name, key) # Clear out any lingering names list self._pending_names[channel_name] = [] # Return a Future, to be populated by the message loop fut = self._join_futures[channel_name] = asyncio.Future() return fut @asyncio.coroutine def names(self, channel_name): """Coroutine that returns a list of names in a channel.""" self.send_message('NAMES', channel_name) # No need to do the same thing twice if channel_name in self._names_futures: return self._names_futures[channel_name] # Clear out any lingering names list self._pending_names[channel_name] = [] # Return a Future, to be populated by the message loop fut = self._names_futures[channel_name] = asyncio.Future() return fut def set_topic(self, channel, topic): """Sets the channel topic.""" self.send_message('TOPIC', channel, topic) # TODO unclear whether this stuff should be separate or what; it's less # about the protocol and more about the dywypi interface def send_message(self, command, *args): message = IRCMessage(command, *args) log.debug("sent: %r", message) self._writer.write(message.render().encode(self.charset) + b'\r\n') def format_transition(self, current_style, new_style): if new_style == Style.default(): # Reset code, ^O return '\x0f' if new_style.fg != current_style.fg and new_style.fg is Color.default: # IRC has no "reset to default" code. mIRC claims color 99 is for # this, but it lies, at least in irssi. So we must reset and # reapply everything. ret = '\x0f' if new_style.bold is Bold.on: ret += '\x02' return ret ret = '' if new_style.fg != current_style.fg: ret += FOREGROUND_CODES[new_style.fg] if new_style.bold != current_style.bold: # There's no on/off for bold, just a toggle ret += '\x02' return ret