class MagicEmotiv: def __init__(self, ptr, upd_interval): self.data_to_send = Queue() self.battery = 40 self.packets = Queue() self.ptr = ptr self.poll_interval = upd_interval def set_filter(self, value): self.poll_interval = 1 / value async def setup(self): print("creating magic emotiv...") async def read_data(self): while self.running: s = {} for shift, sensor in enumerate(sorted(sensor_bits, reverse=True)): s[sensor] = {'quality': 0.0} s[sensor]['value'] = np.random.normal() + shift * 5 packet = MagicPacket( b'Py2\x18\xe7\xb7\xdf\x8e\x86n;g\xbd\xc0\x00\x00\x02\x11(!`' + b'=\x80\x15\xecX\xc6 \xd9ii\x9e', s, False) self.packets.put_nowait(packet) self.data_to_send.put_nowait(packet) self.ptr += 1 await asyncio.sleep(self.poll_interval) async def update_console(self): while self.running: packet = await self.packets.get() print(packet) await asyncio.sleep(self.poll_interval)
class Waiter(BaseHandler): """ The Waiter handler allows an event handler to block until a particular stanza has been received. The handler will either be given the matched stanza, or ``False`` if the waiter has timed out. :param string name: The name of the handler. :param matcher: A :class:`~slixmpp.xmlstream.matcher.base.MatcherBase` derived object for matching stanza objects. :param stream: The :class:`~slixmpp.xmlstream.xmlstream.XMLStream` instance this handler should monitor. """ def __init__(self, name, matcher, stream=None): BaseHandler.__init__(self, name, matcher, stream=stream) self._payload = Queue() def prerun(self, payload): """Store the matched stanza when received during processing. :param payload: The matched :class:`~slixmpp.xmlstream.stanzabase.ElementBase` object. """ self._payload.put_nowait(payload) def run(self, payload): """Do not process this handler during the main event loop.""" pass @asyncio.coroutine def wait(self, timeout=None): """Block an event handler while waiting for a stanza to arrive. Be aware that this will impact performance if called from a non-threaded event handler. Will return either the received stanza, or ``False`` if the waiter timed out. :param int timeout: The number of seconds to wait for the stanza to arrive. Defaults to the the stream's :class:`~slixmpp.xmlstream.xmlstream.XMLStream.response_timeout` value. """ if timeout is None: timeout = slixmpp.xmlstream.RESPONSE_TIMEOUT stanza = None try: stanza = yield from self._payload.get() except TimeoutError: log.warning("Timed out waiting for %s", self.name) self.stream().remove_handler(self.name) return stanza def check_delete(self): """Always remove waiters after use.""" return True
class Message(ws.WS): def __init__(self, loop): self.queue = Queue(loop=loop) def get(self): return self.queue.get() def on_message(self, websocket, message): self.queue.put_nowait(message)
class Listener: def __init__(self): self._messages = Queue() def __call__(self, channel, message): self._messages.put_nowait((channel, message)) def get(self): return self._messages.get()
class ConnectionPool: def __init__(self): self._config_dict = None self._queue = Queue() self._outstanding_connections = WeakSet() async def get_conn(self): self._check_config() try: while True: conn = self._queue.get_nowait() if conn.is_open(): break try: await conn.close() except Exception: l.debug('Exception in close rethink connection', exc_info=True) except QueueEmpty: conn = await r.connect(**self._config_dict) self._outstanding_connections.add(conn) return conn async def put_conn(self, conn): self._queue.put_nowait(conn) self._outstanding_connections.remove(conn) def set_config(self, config): self._config_dict = config def get_config(self): self._check_config() return self._config_dict async def teardown(self): while True: try: conn = self._queue.get_nowait() except QueueEmpty: break self._outstanding_connections.add(conn) for conn in self._outstanding_connections: try: await conn.close() except Exception: l.debug('Exception in close rethink connection', exc_info=True) def _check_config(self): assert self._config_dict is not None, "Did you remember to run resync.setup()?"
class ThreadedProcessReader: def __init__(self, in_stream: TextIO): self._input = in_stream self._queue = Queue(100) self._thread = Thread(target=self._entry, daemon=True) self._thread.start() self._closed = False def _entry(self): try: for line in iter(self._input.readline, ""): try: self._queue.put_nowait(line) except QueueFull: pass self._input.close() except ValueError: pass try: self._queue.put_nowait("") except QueueFull: pass def wait_until_closed(self): self._input.close() self._thread.join(5000) async def poll(self) -> Optional[str]: if self._closed: return None val = await self._queue.get() if not len(val): self._closed = True return None if self._closed else val def poll_all(self): try: val = self._queue.get_nowait() while val is not None: yield val val = self._queue.get_nowait() except QueueEmpty: pass
class MockConnection: __slots__ = Connection.__slots__ @property def completed_queries(self): return completed_queries @property def results(self): return results @results.setter def results(self, result): global results results = result def set_database_results(self, *dbresults): self.results = Queue() for result in dbresults: self.results.put_nowait(result) async def general_query(self, query, *args, **kwargs): completed_queries.append((query, *args, kwargs)) return results.get_nowait() execute = fetch = fetchval = fetchrow = general_query async def prepare(self, query, *, timeout=None): return MockPreparedStatement(self, query, None) async def close(self): pass async def __aenter__(self): return self async def __aexit__(self, exc_type, exc_val, exc_tb): pass def __await__(self): async def get_conn(): return self return get_conn()
async def listen_for_order_book_snapshots(self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue): while True: trading_pairs: List[str] = await self.get_trading_pairs() try: async with aiohttp.ClientSession() as client: for trading_pair in trading_pairs: try: snapshot: Dict[str, Any] = await self.get_snapshot(client, trading_pair) snapshot_timestamp: float = time.time() snapshot_msg: OrderBookMessage = BitfinexOrderBook.snapshot_message_from_exchange( snapshot, snapshot_timestamp ) output.put_nowait(snapshot_msg) self.logger().debug(f"Saved order book snapshot for {trading_pair}") await asyncio.sleep(self.TIME_SLEEP_BETWEEN_REQUESTS) except asyncio.CancelledError: raise except Exception as err: self.logger().error("Listening snapshots", err) self.logger().network( "Unexpected error with HTTP connection.", exc_info=True, app_warning_msg="Unexpected error with HTTP connection. " f"Retrying in {self.TIME_SLEEP_BETWEEN_REQUESTS} sec." "Check network connection." ) await asyncio.sleep(self.TIME_SLEEP_BETWEEN_REQUESTS) this_hour: pd.Timestamp = pd.Timestamp.utcnow().replace( minute=0, second=0, microsecond=0 ) next_hour: pd.Timestamp = this_hour + pd.Timedelta(hours=1) delta: float = next_hour.timestamp() - time.time() await asyncio.sleep(delta) except asyncio.CancelledError: raise except Exception as err: self.logger().error("Listening snapshots", err) self.logger().error("Unexpected error", exc_info=True) await asyncio.sleep(self.TIME_SLEEP_BETWEEN_REQUESTS)
async def listen_for_user_stream(self, output: asyncio.Queue): ws = None while True: try: async with self._throttler.execute_task( CONSTANTS.WS_CONNECTION_LIMIT_ID): ws: WSAssistant = await self._api_factory.get_ws_assistant( ) await ws.connect(ws_url=CONSTANTS.WS_AUTH_URL, ping_timeout=PING_TIMEOUT) if self._current_auth_token is None: self._current_auth_token = await self.get_auth_token() for subscription_type in ["openOrders", "ownTrades"]: subscribe_request: WSRequest = WSRequest({ "event": "subscribe", "subscription": { "name": subscription_type, "token": self._current_auth_token } }) await ws.send(subscribe_request) async for ws_response in ws.iter_messages(): msg = ws_response.data if not (type(msg) is dict and "event" in msg.keys() and msg["event"] in [ "heartbeat", "systemStatus", "subscriptionStatus" ]): output.put_nowait(msg) except asyncio.CancelledError: raise except Exception: self.logger().error( "Unexpected error with Kraken WebSocket connection. " "Retrying after 30 seconds...", exc_info=True) self._current_auth_token = None await asyncio.sleep(30.0) finally: if ws is not None: await ws.disconnect()
async def listen_for_order_book_stream(self, ev_loop: asyncio.BaseEventLoop, snapshot_queue: asyncio.Queue, diff_queue: asyncio.Queue): while True: connection, hub = await self.websocket_connection() try: async for raw_message in self._socket_stream(): decoded: Dict[str, Any] = await self._transform_raw_message( raw_message) trading_pair: str = decoded["results"].get("M") if not trading_pair: # Ignores any other websocket response messages continue # Processes snapshot messages if decoded["type"] == "snapshot": snapshot: Dict[str, any] = decoded snapshot_timestamp = snapshot["nonce"] snapshot_msg: OrderBookMessage = BittrexOrderBook.snapshot_message_from_exchange( snapshot["results"], snapshot_timestamp) snapshot_queue.put_nowait(snapshot_msg) self._snapshot_msg[trading_pair] = { "timestamp": int(time.time()), "content": snapshot_msg } # Processes diff messages if decoded["type"] == "update": diff: Dict[str, any] = decoded diff_timestamp = diff["nonce"] diff_msg: OrderBookMessage = BittrexOrderBook.diff_message_from_exchange( diff["results"], diff_timestamp) diff_queue.put_nowait(diff_msg) except Exception: self.logger().error( "Unexpected error when listening on socket stream.", exc_info=True) finally: connection.close() self._websocket_connection = self._websocket_hub = None self.logger().info("Reinitializing websocket connection...")
async def listen_for_order_book_snapshots(self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue): """ Listen for orderbook snapshots by fetching orderbook """ while True: try: for trading_pair in self._trading_pairs: try: snapshot: Dict[str, any] = await self.get_order_book_data( trading_pair) snapshot_timestamp: int = str_date_to_ts( snapshot["timestamp"]) snapshot_msg: OrderBookMessage = HitbtcOrderBook.snapshot_message_from_exchange( snapshot, snapshot_timestamp, metadata={"trading_pair": trading_pair}) output.put_nowait(snapshot_msg) self.logger().debug( f"Saved order book snapshot for {trading_pair}") # Be careful not to go above API rate limits. await asyncio.sleep(5.0) except asyncio.CancelledError: raise except Exception: self.logger().network( "Unexpected error with WebSocket connection.", exc_info=True, app_warning_msg= "Unexpected error with WebSocket connection. Retrying in 5 seconds. " "Check network connection.") await asyncio.sleep(5.0) this_hour: pd.Timestamp = pd.Timestamp.utcnow().replace( minute=0, second=0, microsecond=0) next_hour: pd.Timestamp = this_hour + pd.Timedelta(hours=1) delta: float = next_hour.timestamp() - time.time() await asyncio.sleep(delta) except asyncio.CancelledError: raise except Exception: self.logger().error("Unexpected error.", exc_info=True) await asyncio.sleep(5.0)
async def listen_for_user_stream(self, output: asyncio.Queue): """ *required Subscribe to user stream via web socket, and keep the connection open for incoming messages :param output: an async queue where the incoming messages are stored """ while True: try: self._ws_assistant = await self._web_assistants_factory.get_ws_assistant() await self._ws_assistant.connect(CONSTANTS.WS_URL, message_timeout=CONSTANTS.WS_MESSAGE_TIMEOUT) subscribe_payload: Dict[str, any] = { "type": "subscribe", "product_ids": self._trading_pairs, "channels": [CONSTANTS.USER_CHANNEL_NAME] } subscribe_request = WSJSONRequest(payload=subscribe_payload, is_auth_required=True) await self._ws_assistant.subscribe(subscribe_request) async for msg in self._iter_messages(self._ws_assistant): msg_type: str = msg.get("type", None) if msg_type is None: raise ValueError(f"Coinbase Pro Websocket message does not contain a type - {msg}") elif msg_type == "error": raise ValueError(f"Coinbase Pro Websocket received error message - {msg['message']}") elif msg_type in ["open", "match", "change", "done"]: output.put_nowait(msg) elif msg_type in ["received", "activate", "subscriptions"]: # these messages are not needed to track the order book pass else: raise ValueError(f"Unrecognized Coinbase Pro Websocket message received - {msg}") except asyncio.CancelledError: self._ws_assistant = None raise except Exception: self._ws_assistant = None self.logger().network( "Unexpected error with WebSocket connection.", exc_info=True, app_warning_msg=f"Unexpected error with WebSocket connection." f" Retrying in {CONSTANTS.REST_API_LIMIT_COOLDOWN} seconds." f" Check network connection." ) await self._sleep(CONSTANTS.REST_API_LIMIT_COOLDOWN)
async def listen_for_user_stream(self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue): """ *required Subscribe to user stream via web socket, and keep the connection open for incoming messages :param ev_loop: ev_loop to execute this function in :param output: an async queue where the incoming messages are stored """ while True: try: async with websockets.connect( BeaxyConstants.TradingApi.WS_BASE_URL) as ws: ws: websockets.WebSocketClientProtocol = ws connect_request = BeaxyStompMessage("CONNECT") connect_request.headers = await self._beaxy_auth.generate_ws_auth_dict( ) await ws.send(connect_request.serialize()) orders_sub_request = BeaxyStompMessage("SUBSCRIBE") orders_sub_request.headers[ "id"] = f"sub-humming-{get_tracking_nonce()}" orders_sub_request.headers[ "destination"] = "/user/v1/orders" orders_sub_request.headers["X-Deltix-Nonce"] = str( get_tracking_nonce()) await ws.send(orders_sub_request.serialize()) async for raw_msg in self._inner_messages(ws): stomp_message = BeaxyStompMessage.deserialize(raw_msg) if stomp_message.has_error(): raise Exception( f"Got error from ws. Headers - {stomp_message.headers}" ) msg = ujson.loads(stomp_message.body) output.put_nowait(msg) except asyncio.CancelledError: raise except Exception: self.logger().error( "Unexpected error with Beaxy connection. " "Retrying after 30 seconds...", exc_info=True) await asyncio.sleep(30.0)
async def listen_for_order_book_diffs( self, ev_loop: Optional[asyncio.BaseEventLoop], output: asyncio.Queue): """Fetches or Subscribes to the order book snapshots for each trading pair. Additionally, parses the incoming message into a OrderBookMessage and appends it into the output Queue.""" while True: try: trading_pairs: List[str] = await self.get_trading_pairs() async with websockets.connect(OKEX_WS_URI_PUBLIC) as ws: ws: websockets.WebSocketClientProtocol = ws for trading_pair in trading_pairs: subscribe_request: Dict[str, Any] = { "op": "subscribe", "args": [{ "channel": "books", "instId": trading_pair }] } await ws.send(json.dumps(subscribe_request)) async for raw_msg in self._inner_messages(ws): decoded_msg: str = raw_msg if '"event":"subscribe"' in decoded_msg: self.logger().debug( f"Subscribed to channel, full message: {decoded_msg}" ) elif '"action":"update"' in decoded_msg: msg = json.loads(decoded_msg) for data in msg['data']: order_book_message: OrderBookMessage = OkexOrderBook.diff_message_from_exchange( data, int(data['ts']), msg['arg']) output.put_nowait(order_book_message) else: self.logger().debug( f"Unrecognized message received from OKEx websocket: {decoded_msg}" ) except asyncio.CancelledError: raise except Exception: self.logger().error( "Unexpected error with WebSocket connection. Retrying after 30 seconds...", exc_info=True) await asyncio.sleep(30.0)
async def listen_for_user_stream(self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue): while True: try: ws_key: str = await get_ws_api_key() async with websockets.connect( f"{LOOPRING_WS_URL}?wsApiKey={ws_key}") as ws: ws: websockets.WebSocketClientProtocol = ws topics = [{ "topic": "order", "market": m } for m in self._orderbook_tracker_data_source.trading_pairs ] topics.append({"topic": "account"}) subscribe_request: Dict[str, Any] = { "op": "sub", "apiKey": self._loopring_auth.generate_auth_dict()["X-API-KEY"], "unsubscribeAll": True, "topics": topics } await ws.send(ujson.dumps(subscribe_request)) async for raw_msg in self._inner_messages(ws): self._last_recv_time = time.time() diff_msg = ujson.loads(raw_msg) if 'op' in diff_msg: continue # These messages are for control of the stream, so skip sending them to the market class output.put_nowait(diff_msg) except asyncio.CancelledError: raise except Exception: self.logger().error( "Unexpected error with Loopring WebSocket connection. " "Retrying after 30 seconds...", exc_info=True) await asyncio.sleep(30.0)
class Client: def __init__(self): self._uri = f"ws://{HOST}:{PORT}" self._txq = Queue() self._rxq = Queue() def quit(self): self._running = False self._txq.put_nowait(dumps("Stop")) self._rxq.put_nowait(dumps("Stop")) async def send(self, message): await self._txq.put(dumps(message)) async def recv(self): message = await self._rxq.get() return loads(message) @property def can_recv(self): return not self._rxq.empty() async def _consumer_handler(self, websocket): while self._running: message = await websocket.recv() await self._rxq.put(message) async def _producer_handler(self, websocket): while self._running: message = await self._txq.get() await websocket.send(message) async def run(self): self._running = True async with connect(self._uri) as websocket: _, pending = await wait( [ create_task(self._consumer_handler(websocket)), create_task(self._producer_handler(websocket)) ], return_when=FIRST_COMPLETED, ) for task in pending: task.cancel()
async def listen_for_order_book_diffs(self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue): while True: try: trading_pairs: List[str] = self._trading_pairs async with websockets.connect(self._api_ws) as ws: ws: websockets.WebSocketClientProtocol = ws if not self._motd_done: try: raw_msg = await asyncio.wait_for(ws.recv(), timeout=self.MESSAGE_TIMEOUT) msg = ujson.loads(raw_msg) # Print MOTD and announcements if present if "motd" in msg: self._motd_done = True self.logger().info(f"Bamboo Relay API MOTD: {msg['motd']}") if "announcements" in msg and len(msg["announcements"]): for announcement in msg["announcements"]: self.logger().info(f"Announcement: {announcement}") except Exception: pass for trading_pair in trading_pairs: request: Dict[str, str] = { "type": "SUBSCRIBE", "topic": "BOOK", "market": trading_pair, "networkId": self._network_id } await ws.send(ujson.dumps(request)) async for raw_msg in self._inner_messages(ws): # Try here, else any errors cause the websocket to disconnect try: msg = ujson.loads(raw_msg) # Valid Diff messages from BambooRelay have actions array if "actions" in msg: diff_msg: BambooRelayOrderBookMessage = BambooRelayOrderBook.diff_message_from_exchange( msg, time.time()) output.put_nowait(diff_msg) except Exception: pass except asyncio.CancelledError: raise except Exception: self.logger().error("Unexpected error with WebSocket connection. Retrying after 30 seconds...", exc_info=True) await asyncio.sleep(30.0)
async def listen_for_trades(self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue): while True: try: trading_pairs = ",".join( list( map( lambda trading_pair: convert_to_exchange_trading_pair(trading_pair), self._trading_pairs))) payload = { "op": CONSTANTS.SUB_ENDPOINT_NAME, "ch": f"trades:{trading_pairs}" } async with websockets.connect(CONSTANTS.WS_URL) as ws: ws: websockets.WebSocketClientProtocol = ws async with self._throttler.execute_task( CONSTANTS.SUB_ENDPOINT_NAME): await ws.send(ujson.dumps(payload)) async for raw_msg in self._inner_messages(ws): msg = ujson.loads(raw_msg) if (msg is None or msg.get("m") != "trades"): continue trading_pair: str = convert_from_exchange_trading_pair( msg.get("symbol")) for trade in msg.get("data"): trade_timestamp: int = trade.get("ts") trade_msg: OrderBookMessage = AscendExOrderBook.trade_message_from_exchange( trade, trade_timestamp, metadata={"trading_pair": trading_pair}) output.put_nowait(trade_msg) except asyncio.CancelledError: raise except Exception as e: self.logger().debug(str(e)) self.logger().error( "Unexpected error with WebSocket connection. Retrying after 30 seconds...", exc_info=True) await asyncio.sleep(30.0)
async def listen_for_order_book_diffs(self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue): while True: try: ws_message: str = await self.get_ws_subscription_message("book" ) async with self._throttler.execute_task( CONSTANTS.WS_CONNECTION_LIMIT_ID): async with websockets.connect(CONSTANTS.WS_URL) as ws: ws: websockets.WebSocketClientProtocol = ws await ws.send(ws_message) async for raw_msg in self._inner_messages(ws): msg = ujson.loads(raw_msg) msg_dict = { "trading_pair": convert_from_exchange_trading_pair(msg[-1]), "asks": msg[1].get("a", []) or msg[1].get("as", []) or [], "bids": msg[1].get("b", []) or msg[1].get("bs", []) or [] } msg_dict["update_id"] = max([ *map(lambda x: float(x[2]), msg_dict["bids"] + msg_dict["asks"]) ], default=0.) if "as" in msg[1] and "bs" in msg[1]: order_book_message: OrderBookMessage = ( KrakenOrderBook. snapshot_ws_message_from_exchange( msg_dict, time.time())) else: order_book_message: OrderBookMessage = KrakenOrderBook.diff_message_from_exchange( msg_dict, time.time()) output.put_nowait(order_book_message) except asyncio.CancelledError: raise except Exception: self.logger().error( "Unexpected error with WebSocket connection. Retrying after 30 seconds...", exc_info=True) await asyncio.sleep(30.0)
async def listen_for_user_stream(self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue): while True: try: self._websocket_connection = signalr_aio.Connection( BITTREX_WS_FEED, session=None) hub = self._websocket_connection.register_hub("c2") self.logger().info("Invoked GetAuthContext") hub.server.invoke("GetAuthContext", self._bittrex_auth.api_key) self._websocket_connection.start() async for raw_message in self._socket_user_stream( self._websocket_connection): decode: Dict[str, Any] = self._transform_raw_message( raw_message) if decode.get("error") is not None: self.logger().error(decode["error"]) continue if decode.get("content") is not None: signature = decode["content"].get("signature") content_type = decode["event_type"] if signature is not None: hub.server.invoke("Authenticate", self._bittrex_auth.api_key, signature) continue if content_type in [ "uO", "uB" ]: # uB: Balance Delta, uO: Order Delta order_delta: OrderBookMessage = self.order_book_class.diff_message_from_exchange( decode) output.put_nowait(order_delta) except asyncio.CancelledError: raise except Exception: self.logger().error( "Unexpected error with Bittrex WebSocket connection. " "Retrying after 30 seconds...", exc_info=True) await asyncio.sleep(30.0)
async def listen_for_order_book_snapshots(self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue): """ *required Fetches order book snapshots for each trading pair, and use them to update the local order book :param ev_loop: ev_loop to execute this function in :param output: an async queue where the incoming messages are stored """ while True: try: trading_pairs: List[str] = self._trading_pairs rest_assistant = await self._get_rest_assistant() for trading_pair in trading_pairs: try: snapshot: Dict[str, any] = await self.get_snapshot(rest_assistant, trading_pair) snapshot_timestamp: float = time.time() snapshot_msg: OrderBookMessage = CoinbaseProOrderBook.snapshot_message_from_exchange( snapshot, snapshot_timestamp, metadata={"product_id": trading_pair} ) output.put_nowait(snapshot_msg) self.logger().debug(f"Saved order book snapshot for {trading_pair}") # Be careful not to go above API rate limits. await self._sleep(CONSTANTS.REST_API_LIMIT_COOLDOWN) except asyncio.CancelledError: raise except Exception: self.logger().network( "Unexpected error with WebSocket connection.", exc_info=True, app_warning_msg=f"Unexpected error with WebSocket connection." f" Retrying in {CONSTANTS.REST_API_LIMIT_COOLDOWN} seconds." f" Check network connection." ) await self._sleep(CONSTANTS.REST_API_LIMIT_COOLDOWN) this_hour: pd.Timestamp = pd.Timestamp.utcnow().replace(minute=0, second=0, microsecond=0) next_hour: pd.Timestamp = this_hour + pd.Timedelta(hours=1) delta: float = next_hour.timestamp() - time.time() await self._sleep(delta) except asyncio.CancelledError: raise except Exception: self.logger().error("Unexpected error.", exc_info=True) await self._sleep(CONSTANTS.REST_API_LIMIT_COOLDOWN)
class DbQueue: """Database change queue.""" _loop: AbstractEventLoop _queue: Queue[Union[_DbRequest, Future[None]]] def __init__(self) -> None: self._loop = get_event_loop() self._queue = Queue() def queue_write(self, callback: Optional[Callable[[], Awaitable[bool]]], sql: str, params: List[Any]) -> None: """Queues a write operation to database. The callback is executed immediately before the write would be sent to database. Returning false discards the write. """ self._queue.put_nowait(_DbRequest(callback, sql, params)) def wait_for_writes(self) -> Future[None]: """Creates a future that will complete after current writes. By awaiting on this, caller can make sure that the writes issued before this have been completed before e.g. SELECTing from database. Note that writes issues after call to this may also have been completed. """ fut = self._loop.create_future() self._queue.put_nowait(fut) return fut async def process_queue(self, conn: Connection) -> None: """Processes the write queue. This never returns, use asyncio.create_task(). """ while True: entry = await self._queue.get() if isinstance(entry, _DbRequest): # Execute SQL write # Execute callback if it exists if entry.callback is None or await entry.callback(): # If callback did not exist or returned True, proceed to execute SQL await conn.execute(entry.sql, *entry.params) else: # Just complete futures once we reach them entry.set_result(None)
async def listen_for_order_book_diffs(self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue): while True: try: ws_path: str = "/".join([f"{trading_pair.lower()}@depth" for trading_pair in self._trading_pairs]) stream_url: str = f"{DIFF_STREAM_URL}/{ws_path}" async with websockets.connect(stream_url) as ws: ws: websockets.WebSocketClientProtocol = ws async for raw_msg in self._inner_messages(ws): msg = ujson.loads(raw_msg) order_book_message: OrderBookMessage = BinanceOrderBook.diff_message_from_exchange( msg, time.time()) output.put_nowait(order_book_message) except asyncio.CancelledError: raise except Exception: self.logger().error("Unexpected error with WebSocket connection. Retrying after 30 seconds...", exc_info=True) await asyncio.sleep(30.0)
async def listen_for_trades(self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue): ws = None while True: try: ws = await self._create_websocket_connection() streams = [ wazirx_utils.convert_to_exchange_trading_pair(pair) + "@trades" for pair in self._trading_pairs ] subscribe_request: Dict[str, Any] = { "event": "subscribe", "streams": streams } await ws.send_json(subscribe_request) async for json_msg in self._iter_messages(ws): if "stream" in json_msg: if "@trades" in json_msg["stream"]: for trade in json_msg["data"]["trades"]: trade: Dict[Any] = trade trade_timestamp: int = ms_timestamp_to_s( trade["E"]) trade_msg: OrderBookMessage = WazirxOrderBook.trade_message_from_exchange( trade, trade_timestamp, metadata={ "trading_pair": wazirx_utils. convert_from_exchange_trading_pair( trade["s"]) }) output.put_nowait(trade_msg) except asyncio.CancelledError: raise except Exception: self.logger().error( "Unexpected error with WebSocket connection. Retrying after 30 seconds...", exc_info=True) finally: ws and await ws.close() await self._sleep(30.0)
async def listen_for_trades(self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue): """ Listen for trades using websocket trade channel """ while True: try: ws = HitbtcWebsocket() await ws.connect() for pair in self._trading_pairs: symbol = await HitbtcAPIOrderBookDataSource.exchange_symbol_associated_to_pair( pair) await ws.subscribe(Constants.WS_SUB["TRADES"], symbol) async for response in ws.on_message(): method: str = response.get("method", None) trades_data: str = response.get("params", None) if trades_data is None or method != Constants.WS_METHODS[ 'TRADES_UPDATE']: continue pair: str = await self.trading_pair_associated_to_exchange_symbol( response["params"]["symbol"]) for trade in trades_data["data"]: trade: Dict[Any] = trade trade_timestamp: int = str_date_to_ts( trade["timestamp"]) trade_msg: OrderBookMessage = HitbtcOrderBook.trade_message_from_exchange( trade, trade_timestamp, metadata={"trading_pair": pair}) output.put_nowait(trade_msg) except asyncio.CancelledError: raise except Exception: self.logger().error("Unexpected error.", exc_info=True) await asyncio.sleep(5.0) finally: await ws.disconnect()
async def listen_for_trades(self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue): while True: try: trading_pairs: List[str] = await self.get_trading_pairs() async with websockets.connect(HUOBI_WS_URI) as ws: ws: websockets.WebSocketClientProtocol = ws for trading_pair in trading_pairs: subscribe_request: Dict[str, Any] = { "sub": f"market.{trading_pair}.trade.detail", "id": trading_pair } await ws.send(json.dumps(subscribe_request)) async for raw_msg in self._inner_messages(ws): # Huobi compresses their ws data encoded_msg: bytes = gzip.decompress(raw_msg) # Huobi's data value for id is a large int too big for ujson to parse msg: Dict[str, Any] = json.loads( encoded_msg.decode('utf-8')) if "ping" in msg: await ws.send( f'{{"op":"pong","ts": {str(msg["ping"])}}}') elif "subbed" in msg: pass elif "ch" in msg: trading_pair = msg["ch"].split(".")[1] for data in msg["tick"]["data"]: trade_message: OrderBookMessage = HuobiOrderBook.trade_message_from_exchange( data, metadata={"trading_pair": trading_pair}) output.put_nowait(trade_message) else: self.logger().debug( f"Unrecognized message received from Huobi websocket: {msg}" ) except asyncio.CancelledError: raise except Exception: self.logger().error( "Unexpected error with WebSocket connection. Retrying after 30 seconds...", exc_info=True) await asyncio.sleep(30.0)
async def listen_for_user_stream( self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue) -> AsyncIterable[Any]: """ *required Subscribe to user stream via web socket, and keep the connection open for incoming messages :param ev_loop: ev_loop to execute this function in :param output: an async queue where the incoming messages are stored """ while True: try: ws: websockets.WebSocketClientProtocol = await self._init_websocket_connection( ) self.logger().info("Authenticating to User Stream...") await self._authenticate(ws) self.logger().info( "Successfully authenticated to User Stream.") await self._subscribe_to_channels(ws) self.logger().info( "Successfully subscribed to all Private channels.") async for msg in self._inner_messages(ws): try: msg = ujson.loads(msg) if msg is None: continue output.put_nowait(msg) except Exception: self.logger().error( "Unexpected error when parsing BitMart user_stream message. ", exc_info=True) raise except asyncio.CancelledError: raise except Exception: self.logger().error( "Unexpected error with BitMart WebSocket connection. Retrying after 30 seconds...", exc_info=True) if self._websocket_client is not None: await self._websocket_client.close() self._websocket_client = None await asyncio.sleep(30.0)
async def listen_for_order_book_snapshots(self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue): await self._get_tracking_pair_done_event.wait() while True: try: trading_pairs: List[str] = await self.get_trading_pairs() async with aiohttp.ClientSession() as client: for trading_pair in trading_pairs: try: snapshot: Dict[str, any] = await self.get_snapshot(client, trading_pair) snapshot_timestamp: float = time.time() snapshot_msg: DDEXOrderBookMessage = DDEXOrderBook.snapshot_message_from_exchange( snapshot, snapshot_timestamp, {"marketId": trading_pair} ) output.put_nowait(snapshot_msg) self.logger().debug(f"Saved order book snapshot for {trading_pair} at {snapshot_timestamp}") await asyncio.sleep(5.0) except asyncio.CancelledError: raise except IOError: self.logger().network( f"Error getting snapshot for {trading_pair}.", exc_info=True, app_warning_msg=f"Error getting snapshot for {trading_pair}. Check network connection." ) await asyncio.sleep(5.0) except Exception: self.logger().error(f"Error processing snapshot for {trading_pair}.", exc_info=True) await asyncio.sleep(5.0) this_hour: pd.Timestamp = pd.Timestamp.utcnow().replace(minute=0, second=0, microsecond=0) next_hour: pd.Timestamp = this_hour + pd.Timedelta(hours=1) delta: float = next_hour.timestamp() - time.time() await asyncio.sleep(delta) except asyncio.CancelledError: raise except Exception: self.logger().network( f"Unexpected error listening for order book snapshot.", exc_info=True, app_warning_msg=f"Unexpected error listening for order book snapshot. Check network connection." ) await asyncio.sleep(5.0)
async def _parse_trade_message(self, raw_message: Dict[str, Any], message_queue: asyncio.Queue): trade_data: Dict[str, Any] = raw_message["data"] timestamp: float = int(trade_data["time"]) * 1e-9 trading_pair = await self._connector.trading_pair_associated_to_exchange_symbol(symbol=trade_data["symbol"]) message_content = { "trade_id": trade_data["tradeId"], "update_id": trade_data["sequence"], "trading_pair": trading_pair, "trade_type": float(TradeType.BUY.value) if trade_data["side"] == "buy" else float( TradeType.SELL.value), "amount": trade_data["size"], "price": trade_data["price"] } trade_message: Optional[OrderBookMessage] = OrderBookMessage( message_type=OrderBookMessageType.TRADE, content=message_content, timestamp=timestamp) message_queue.put_nowait(trade_message)
async def listen_for_user_stream(self, ev_loop: asyncio.AbstractEventLoop, output: asyncio.Queue): while True: try: self._websocket_connection = signalr_aio.Connection( BITTREX_WS_FEED, session=None) self.hub = self._websocket_connection.register_hub("c3") await self.authenticate() self.hub.server.invoke("Subscribe", ["heartbeat"]) self.hub.server.invoke("Subscribe", ["order"]) self.hub.server.invoke("Subscribe", ["balance"]) self._websocket_connection.start() async for raw_message in self._socket_user_stream( self._websocket_connection): decode: Dict[str, Any] = self._transform_raw_message( raw_message) self.logger().debug(f"Got ws message {decode}") if decode.get("error") is not None: self.logger().error(decode["error"]) continue content_type = decode.get("event_type") if content_type is not None: if content_type in [ "balance", "order" ]: # balance: Balance Delta, order: Order Delta output.put_nowait(decode) elif content_type == "re-authenticate": await self.authenticate() elif content_type == "heartbeat": self.logger().debug("WS heartbeat") continue except asyncio.CancelledError: raise except Exception: self.logger().error( "Unexpected error with Bittrex WebSocket connection. " "Retrying after 30 seconds...", exc_info=True) await asyncio.sleep(30.0)
async def authorize( token, connection, status_queue: asyncio.Queue, watchdog_queue: asyncio.Queue, ): reader, writer = connection["reader"], connection["writer"] data = await reader.readline() writer.write(f"{token}\n".encode()) await writer.drain() data = await reader.readline() response_info = data.decode().strip() if not json.loads(response_info): raise TokenValidException("Invalid token. Check or register new.") status_queue.put_nowait( NicknameReceived(json.loads(response_info)["nickname"]), ) watchdog_queue.put_nowait("auth")
async def scan_computers(queue: asyncio.Queue): monitor = Monitor(NETWORK) loop = asyncio.get_running_loop() while True: with ThreadPoolExecutor(MAX_WORKERS) as pool: aws = [ loop.run_in_executor(pool, Monitor.scan_host, monitor, host) for host in monitor.get_hosts(NUM_OF_HOSTS) ] for coro in asyncio.as_completed(aws): try: computer = await coro except HostNotFound: pass else: queue.put_nowait(computer) print(f"Sleeping for {SLEEP_INTERVAL / 60} minutes...") await asyncio.sleep(SLEEP_INTERVAL)
async def listen_for_user_stream(self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue) -> AsyncIterable[Any]: """ *required Subscribe to user stream via web socket, and keep the connection open for incoming messages :param ev_loop: ev_loop to execute this function in :param output: an async queue where the incoming messages are stored """ while True: try: async for msg in self._listen_to_orders_trades_balances(): output.put_nowait(msg) except asyncio.CancelledError: raise except Exception: self.logger().error( "Unexpected error with CryptoCom WebSocket connection. " "Retrying after 30 seconds...", exc_info=True ) await asyncio.sleep(30.0)
async def listen_for_trades(self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue): """ Listen for trades using websocket "updateTrades" method """ while True: try: ws = BitcoinComWebsocket() await ws.connect() trading_pairs: List[str] = await self.get_trading_pairs() for trading_pair in trading_pairs: await ws.subscribe( "subscribeTrades", { "symbol": trading_pair, "limit": 1 # we only care about updates, this sets the initial snapshot limit }) async for response in ws.on("updateTrades"): if (response["error"] is not None): self.logger().error(response["error"]) continue trades = response["data"]["data"] for trade in trades: trade_timestamp: float = pd.Timestamp( trade["timestamp"]).timestamp() trade_msg: OrderBookMessage = BitcoinComOrderBook.trade_message_from_exchange( add_event_type(EventTypes.TradesUpdate, trade), trade_timestamp, metadata={"trading_pair": trading_pair}) output.put_nowait(trade_msg) except asyncio.CancelledError: raise except Exception: self.logger().error("Unexpected error.", exc_info=True) await asyncio.sleep(5.0) finally: await ws.disconnect()
async def _process_event_message(self, event_message: Dict[str, Any], queue: asyncio.Queue): if event_message.get("error") is not None: err_msg = event_message.get("error", {}).get("message", event_message.get("error")) raise IOError({ "label": "WSS_ERROR", "message": f"Error received via websocket - {err_msg}." }) elif event_message.get("event") == "update" and event_message.get( "channel") in [ CONSTANTS.USER_TRADES_ENDPOINT_NAME, CONSTANTS.USER_ORDERS_ENDPOINT_NAME, CONSTANTS.USER_BALANCE_ENDPOINT_NAME, ]: queue.put_nowait(event_message)
class Echo(WS): def __init__(self, loop=None): self.queue = Queue(loop=loop) def get(self): return self.queue.get() def on_message(self, ws, message): self.queue.put_nowait(message) def on_ping(self, ws, body): ws.pong(body) self.queue.put_nowait('PING: %s' % body.decode('utf-8')) def on_pong(self, ws, body): self.queue.put_nowait('PONG: %s' % body.decode('utf-8')) def on_close(self, ws): self.queue.put_nowait('CLOSE')
class AsyncioSubscriptionManager(SubscriptionManager): def __init__(self, pubnub_instance): subscription_manager = self self._message_worker = None self._message_queue = Queue() self._subscription_lock = Semaphore(1) self._subscribe_loop_task = None self._heartbeat_periodic_callback = None self._reconnection_manager = AsyncioReconnectionManager(pubnub_instance) super(AsyncioSubscriptionManager, self).__init__(pubnub_instance) self._start_worker() class AsyncioReconnectionCallback(ReconnectionCallback): def on_reconnect(self): subscription_manager.reconnect() pn_status = PNStatus() pn_status.category = PNStatusCategory.PNReconnectedCategory pn_status.error = False subscription_manager._subscription_status_announced = True subscription_manager._listener_manager.announce_status(pn_status) self._reconnection_listener = AsyncioReconnectionCallback() self._reconnection_manager.set_reconnection_listener(self._reconnection_listener) def _set_consumer_event(self): if not self._message_worker.cancelled(): self._message_worker.cancel() def _message_queue_put(self, message): self._message_queue.put_nowait(message) def _start_worker(self): consumer = AsyncioSubscribeMessageWorker(self._pubnub, self._listener_manager, self._message_queue, None) self._message_worker = asyncio.ensure_future(consumer.run(), loop=self._pubnub.event_loop) def reconnect(self): # TODO: method is synchronized in Java self._should_stop = False self._subscribe_loop_task = asyncio.ensure_future(self._start_subscribe_loop()) self._register_heartbeat_timer() def disconnect(self): # TODO: method is synchronized in Java self._should_stop = True self._stop_heartbeat_timer() self._stop_subscribe_loop() def stop(self): super(AsyncioSubscriptionManager, self).stop() self._reconnection_manager.stop_polling() if self._subscribe_loop_task is not None and not self._subscribe_loop_task.cancelled(): self._subscribe_loop_task.cancel() @asyncio.coroutine def _start_subscribe_loop(self): self._stop_subscribe_loop() yield from self._subscription_lock.acquire() combined_channels = self._subscription_state.prepare_channel_list(True) combined_groups = self._subscription_state.prepare_channel_group_list(True) if len(combined_channels) == 0 and len(combined_groups) == 0: self._subscription_lock.release() return self._subscribe_request_task = asyncio.ensure_future(Subscribe(self._pubnub) .channels(combined_channels) .channel_groups(combined_groups) .timetoken(self._timetoken).region(self._region) .filter_expression(self._pubnub.config.filter_expression) .future()) e = yield from self._subscribe_request_task if self._subscribe_request_task.cancelled(): self._subscription_lock.release() return if e.is_error(): if e.status is not None and e.status.category == PNStatusCategory.PNCancelledCategory: self._subscription_lock.release() return if e.status is not None and e.status.category == PNStatusCategory.PNTimeoutCategory: self._pubnub.event_loop.call_soon(self._start_subscribe_loop) self._subscription_lock.release() return logger.error("Exception in subscribe loop: %s" % str(e)) if e.status is not None and e.status.category == PNStatusCategory.PNAccessDeniedCategory: e.status.operation = PNOperationType.PNUnsubscribeOperation # TODO: raise error self._listener_manager.announce_status(e.status) self._reconnection_manager.start_polling() self._subscription_lock.release() self.disconnect() return else: self._handle_endpoint_call(e.result, e.status) self._subscription_lock.release() self._subscribe_loop_task = asyncio.ensure_future(self._start_subscribe_loop()) self._subscription_lock.release() def _stop_subscribe_loop(self): if self._subscribe_request_task is not None and not self._subscribe_request_task.cancelled(): self._subscribe_request_task.cancel() def _stop_heartbeat_timer(self): if self._heartbeat_periodic_callback is not None: self._heartbeat_periodic_callback.stop() def _register_heartbeat_timer(self): super(AsyncioSubscriptionManager, self)._register_heartbeat_timer() self._heartbeat_periodic_callback = AsyncioPeriodicCallback( self._perform_heartbeat_loop, self._pubnub.config.heartbeat_interval * 1000, self._pubnub.event_loop) if not self._should_stop: self._heartbeat_periodic_callback.start() @asyncio.coroutine def _perform_heartbeat_loop(self): if self._heartbeat_call is not None: # TODO: cancel call pass cancellation_event = Event() state_payload = self._subscription_state.state_payload() presence_channels = self._subscription_state.prepare_channel_list(False) presence_groups = self._subscription_state.prepare_channel_group_list(False) if len(presence_channels) == 0 and len(presence_groups) == 0: return try: heartbeat_call = (Heartbeat(self._pubnub) .channels(presence_channels) .channel_groups(presence_groups) .state(state_payload) .cancellation_event(cancellation_event) .future()) envelope = yield from heartbeat_call heartbeat_verbosity = self._pubnub.config.heartbeat_notification_options if envelope.status.is_error: if heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL or \ heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL: self._listener_manager.announce_stateus(envelope.status) else: if heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL: self._listener_manager.announce_stateus(envelope.status) except PubNubAsyncioException as e: pass # TODO: check correctness # if e.status is not None and e.status.category == PNStatusCategory.PNTimeoutCategory: # self._start_subscribe_loop() # else: # self._listener_manager.announce_status(e.status) finally: cancellation_event.set() def _send_leave(self, unsubscribe_operation): asyncio.ensure_future(self._send_leave_helper(unsubscribe_operation)) @asyncio.coroutine def _send_leave_helper(self, unsubscribe_operation): envelope = yield from Leave(self._pubnub) \ .channels(unsubscribe_operation.channels) \ .channel_groups(unsubscribe_operation.channel_groups).future() self._listener_manager.announce_status(envelope.status)
class SubscribeListener(SubscribeCallback): def __init__(self): self.connected = False self.connected_event = Event() self.disconnected_event = Event() self.presence_queue = Queue() self.message_queue = Queue() self.error_queue = Queue() def status(self, pubnub, status): if utils.is_subscribed_event(status) and not self.connected_event.is_set(): self.connected_event.set() elif utils.is_unsubscribed_event(status) and not self.disconnected_event.is_set(): self.disconnected_event.set() elif status.is_error(): self.error_queue.put_nowait(status.error_data.exception) def message(self, pubnub, message): self.message_queue.put_nowait(message) def presence(self, pubnub, presence): self.presence_queue.put_nowait(presence) @asyncio.coroutine def _wait_for(self, coro): scc_task = asyncio.ensure_future(coro) err_task = asyncio.ensure_future(self.error_queue.get()) yield from asyncio.wait([ scc_task, err_task ], return_when=asyncio.FIRST_COMPLETED) if err_task.done() and not scc_task.done(): if not scc_task.cancelled(): scc_task.cancel() raise err_task.result() else: if not err_task.cancelled(): err_task.cancel() return scc_task.result() @asyncio.coroutine def wait_for_connect(self): if not self.connected_event.is_set(): yield from self._wait_for(self.connected_event.wait()) else: raise Exception("instance is already connected") @asyncio.coroutine def wait_for_disconnect(self): if not self.disconnected_event.is_set(): yield from self._wait_for(self.disconnected_event.wait()) else: raise Exception("instance is already disconnected") @asyncio.coroutine def wait_for_message_on(self, *channel_names): channel_names = list(channel_names) while True: try: env = yield from self._wait_for(self.message_queue.get()) if env.channel in channel_names: return env else: continue finally: self.message_queue.task_done() @asyncio.coroutine def wait_for_presence_on(self, *channel_names): channel_names = list(channel_names) while True: try: env = yield from self._wait_for(self.presence_queue.get()) if env.channel in channel_names: return env else: continue finally: self.presence_queue.task_done()
class WebCrawler: ''' WebCrawler class, starts at a root domain of a given resource. It starts on the root page, finds all Initialize a new webcrawler instance. @param(basePath): The root of the domain to crawl ''' def __init__(self, basePath, max_tasks=25): # max concurrent tasks self.max_tasks = max_tasks # we have seen this url self.processed = set() # BasePath of url to start crawl, should be root of a domain self.basePath = basePath # event loop, we are not fallbacking to iocp (win32) or select or any sort of other event loop, we will only use asyncio provided event loop self.loop = asyncio.get_event_loop() # create our session, which encapsulates a connection pool self.session = aiohttp.ClientSession(loop=self.loop) # get Queue self.queue = Queue(loop=self.loop) # first url self.queue.put_nowait(self.basePath) # JSON for visualization self.data = [] ''' Check if this is static data ''' def _is_static_(self): # As far as static vs. dynamic, it's because it looks like the resource is cachable (making it "static"). # You need a pragma: no-cache and/or a cache-control: no-cache header for it to really be a dynamic asset. pass ''' Get all static assets on a page ''' def get_static(self, s, url): # hacky but works scripts = [ x['src'] for x in s.findAll('script') if x.has_attr('src') and (x["src"].startswith('/') and not x['src'][1] == '/')] styles = [ x['href'] for x in s.findAll('link') if x.has_attr('href') and x["href"].startswith('/') ] return scripts + styles ''' Cleanup on aiohttp ''' def close(self): try: # aiohttp keeps a TCP connection alive for 30secs, this explicitly closes it self.session.close() except: pass ''' Process is a coroutine which our tasks/workers/threads/coroutines/whatever will do their corresponding work. Each process will fetch their urls from the queue for processing. ''' async def process(self): try: while True: try: # suspend until we get a new url to work on url = await self.queue.get() # remove trailing slash if url[-1] == '/': url = url[:-1] # we have not seen this url, so we fetch it and add it if url not in self.processed: self.processed.add(url) # suspend execution until we get data from our HTTP request resp = await self.fetch(url) if resp != None: # add to sites self.data.append(resp) # go through each link and add them to the queue if we have not traversed them links = [x for x in resp['links'] if x.startswith('/') or x.startswith(url)] for link in links: # formatting if not link.startswith(self.basePath): link = self.basePath + link if '#' in link: link = link[:link.index('#')] # add it to our queue for processing if link not in self.processed: if link != '' and link != None: self.queue.put_nowait(link) # this task is done self.queue.task_done() except Exception as err: pass except asyncio.CancelledError: pass ''' Parsed a url for links and other stuff too ''' def parse(self, data, url): # parse a single url s = soup(data.decode('utf-8', 'ignore'), "html.parser") # get links links = [ x['href'] for x in s.findAll('a') if x.has_attr('href') ] # get assets assets = self.get_static(s, url) # get title title = s.find('title') if title != None: title = title.text else: title = '' return { 'url': url, 'title': title, 'links': links, 'assets': assets } ''' Put our JSONStatham in a file ''' def _save_file(self): # save data with open('sitemap.json', 'w') as sitemapfile: json.dump({ "sitemap": "Sitemap generated for URL {} on {}. {} pages parsed.".format(self.basePath, datetime.now(), len(self.processed)), "sites": self.data }, sitemapfile) ''' Start ze crawl ''' def crawl(self): try: # crawl until complete self.loop.run_until_complete(self.__crawl__()) except KeyboardInterrupt: sys.stderr.flush() finally: pass ''' Asynchronous crawl ''' async def __crawl__(self): print('Starting webcrawler on url {}'.format(self.basePath)) t1 = time.time() # make tasks that are processing the queue tasks = [asyncio.ensure_future(self.process(), loop=self.loop) for _ in range(self.max_tasks)] # aggregate tasks and squash exceptions asyncio.gather(*tasks, return_exceptions=True) # all queue items should call task_done for each put await self.queue.join() # cancel tasks for t in tasks: t.cancel() self.close() self.loop.stop() # save JSON file for viewing self._save_file() # print('{} pages processed in {} secs. Data saved in sitemap.json'.format(len(self.processed), time.time() - t1)) # leave exit(1) ''' HTTP request a page. ''' async def fetch(self, url): try: # alright, so i really should be handling redirects myself, but i'm not, because of reasons async with self.session.get(url, allow_redirects=False) as r: assert r.status == 200 # Get the page and parse it resp = self.parse(await r.read(), url) return resp except: self.queue.task_done()
class Pool(AsyncObject): '''An asynchronous pool of open connections. Open connections are either :attr:`in_use` or :attr:`available` to be used. Available connection are placed in an :class:`asyncio.Queue`. This class is not thread safe. ''' def __init__(self, creator, pool_size=10, loop=None, timeout=None, **kw): self._creator = creator self._closed = False self._timeout = timeout self._queue = Queue(maxsize=pool_size, loop=loop) self._connecting = 0 self._loop = self._queue._loop self._in_use_connections = set() @property def pool_size(self): '''The maximum number of open connections allowed. If more connections are requested, the request is queued and a connection returned as soon as one becomes available. ''' return self._queue._maxsize @property def in_use(self): '''The number of connections in use. These connections are not available until they are released back to the pool. ''' return len(self._in_use_connections) @property def available(self): '''Number of available connections in the pool. ''' return reduce(self._count_connections, self._queue._queue, 0) def __contains__(self, connection): if connection not in self._in_use_connections: return connection in self._queue._queue return True def connect(self): '''Get a connection from the pool. The connection is either a new one or retrieved from the :attr:`available` connections in the pool. :return: a :class:`.Future` resulting in the connection. ''' assert not self._closed return PoolConnection.checkout(self) def close(self): '''Close all :attr:`available` and :attr:`in_use` connections. ''' self._closed = True queue = self._queue while queue.qsize(): connection = queue.get_nowait() connection.close() in_use = self._in_use_connections self._in_use_connections = set() for connection in in_use: connection.close() def _get(self): queue = self._queue # grab the connection without waiting, important! if queue.qsize(): connection = queue.get_nowait() # wait for one to be available elif self.in_use + self._connecting >= queue._maxsize: if self._timeout: connection = yield future_timeout(queue.get(), self._timeout) else: connection = yield queue.get() else: # must create a new connection self._connecting += 1 try: connection = yield self._creator() finally: self._connecting -= 1 # None signal that a connection was removed form the queue # Go again if connection is None: connection = yield self._get() else: if is_socket_closed(connection.sock): connection.close() connection = yield self._get() else: self._in_use_connections.add(connection) coroutine_return(connection) def _put(self, conn, discard=False): if not self._closed: try: self._queue.put_nowait(None if discard else conn) except QueueFull: conn.close() self._in_use_connections.discard(conn) def info(self, message=None, level=None): # pragma nocover if self._queue._maxsize != 2: return message = '%s: ' % message if message else '' self.logger.log(level or 10, '%smax size %s, in_use %s, available %s', message, self._queue._maxsize, self.in_use, self.available) def _count_connections(self, x, y): return x + int(y is not None)
class ProxyResponse(object): '''Asynchronous wsgi response. ''' _started = False _headers = None _done = False def __init__(self, environ, start_response): self._loop = environ['pulsar.connection']._loop self.environ = environ self.start_response = start_response self.queue = Queue() def __iter__(self): while True: if self._done: try: yield self.queue.get_nowait() except QueueEmpty: break else: yield async(self.queue.get(), loop=self._loop) def pre_request(self, response, exc=None): self._started = True response.bind_event('data_processed', self.data_processed) return response def error(self, exc): if not self._started: request = wsgi.WsgiRequest(self.environ) content_type = request.content_types.best_match( ('text/html', 'text/plain')) uri = self.environ['RAW_URI'] msg = 'Could not find %s' % uri logger.info(msg=msg) if content_type == 'text/html': html = wsgi.HtmlDocument(title=msg) html.body.append('<h1>%s</h1>' % msg) data = html.render() resp = wsgi.WsgiResponse(504, data, content_type='text/html') elif content_type == 'text/plain': resp = wsgi.WsgiResponse(504, msg, content_type='text/html') else: resp = wsgi.WsgiResponse(504, '') self.start_response(resp.status, resp.get_headers()) self._done = True self.queue.put_nowait(resp.content[0]) def data_processed(self, response, exc=None, **kw): '''Receive data from the requesting HTTP client.''' status = response.get_status() if status == '100 Continue': stream = self.environ.get('wsgi.input') or io.BytesIO() body = yield stream.read() response.transport.write(body) if response.parser.is_headers_complete(): if self._headers is None: headers = self.remove_hop_headers(response.headers) self._headers = Headers(headers, kind='server') # start the response self.start_response(status, list(self._headers)) body = response.recv_body() if response.parser.is_message_complete(): self._done = True self.queue.put_nowait(body) def remove_hop_headers(self, headers): for header, value in headers: if header.lower() not in wsgi.HOP_HEADERS: yield header, value
class Crawler: """Crawl a set of URLs. This manages two sets of URLs: 'urls' and 'done'. 'urls' is a set of URLs seen, and 'done' is a list of FetchStatistics. """ def __init__(self, roots, exclude=None, strict=True, # What to crawl. max_redirect=10, max_tries=4, # Per-url limits. max_tasks=10, *, loop=None): self.loop = loop or asyncio.get_event_loop() self.roots = roots self.exclude = exclude self.strict = strict self.max_redirect = max_redirect self.max_tries = max_tries self.max_tasks = max_tasks self.q = Queue(loop=self.loop) self.seen_urls = set() self.done = [] self.session = aiohttp.ClientSession(loop=self.loop) self.root_domains = set() for root in roots: parts = urllib.parse.urlparse(root) host, port = urllib.parse.splitport(parts.netloc) if not host: continue if re.match(r'\A[\d\.]*\Z', host): self.root_domains.add(host) else: host = host.lower() if self.strict: self.root_domains.add(host) else: self.root_domains.add(lenient_host(host)) for root in roots: self.add_url(root) self.t0 = time.time() self.t1 = None def close(self): """Close resources.""" self.session.close() def host_okay(self, host): """Check if a host should be crawled. A literal match (after lowercasing) is always good. For hosts that don't look like IP addresses, some approximate matches are okay depending on the strict flag. """ host = host.lower() if host in self.root_domains: return True if re.match(r'\A[\d\.]*\Z', host): return False if self.strict: return self._host_okay_strictish(host) else: return self._host_okay_lenient(host) def _host_okay_strictish(self, host): """Check if a host should be crawled, strict-ish version. This checks for equality modulo an initial 'www.' component. """ host = host[4:] if host.startswith('www.') else 'www.' + host return host in self.root_domains def _host_okay_lenient(self, host): """Check if a host should be crawled, lenient version. This compares the last two components of the host. """ return lenient_host(host) in self.root_domains def record_statistic(self, fetch_statistic): """Record the FetchStatistic for completed / failed URL.""" self.done.append(fetch_statistic) @asyncio.coroutine def parse_links(self, response): """Return a FetchStatistic and list of links.""" links = set() content_type = None encoding = None body = yield from response.read() if response.status == 200: content_type = response.headers.get('content-type') pdict = {} if content_type: content_type, pdict = cgi.parse_header(content_type) encoding = pdict.get('charset', 'utf-8') if content_type in ('text/html', 'application/xml'): text = yield from response.text() # Replace href with (?:href|src) to follow image links. urls = set(re.findall(r'''(?i)href=["']([^\s"'<>]+)''',text)) if urls: LOGGER.info('got %r distinct urls from %r',len(urls), response.url) for url in urls: normalized = urllib.parse.urljoin(response.url, url) defragmented, frag = urllib.parse.urldefrag(normalized) if self.url_allowed(defragmented): links.add(defragmented) stat = FetchStatistic( url=response.url, next_url=None, status=response.status, exception=None, size=len(body), content_type=content_type, encoding=encoding, num_urls=len(links), num_new_urls=len(links - self.seen_urls)) return stat, links @asyncio.coroutine def fetch(self, url, max_redirect): """Fetch one URL.""" tries = 0 exception = None while tries < self.max_tries: try: response = yield from self.session.get(url, allow_redirects=False) #1 break #2 except aiohttp.ClientError as client_error: LOGGER.info('try %r for %r raised %r', tries, url, client_error) exception = client_error else: return try: if is_redirect(response): location = response.headers['location'] else: #4 stat, links = yield from self.parse_links(response) self.record_statistic(stat) for link in links.difference(self.seen_urls): self.q.put_nowait((link, self.max_redirect)) self.seen_urls.update(links) finally: yield from response.release() @asyncio.coroutine def work(self): """Process queue items forever.""" try: while True: url, max_redirect = yield from self.q.get() #q.get() Remove and return an item from the queue. If queue is empty, wait until an item is available. #print('url',url, 'max_redirect', max_redirect) assert url in self.seen_urls #assert 断言,异常会直接抛出 yield from self.fetch(url, max_redirect) self.q.task_done() #Indicate that a formerly enqueued task is complete.表明以前排队的任务完成 except asyncio.CancelledError: pass def url_allowed(self, url): if self.exclude and re.search(self.exclude, url): return False parts = urllib.parse.urlparse(url) if parts.scheme not in ('http', 'https'): LOGGER.debug('skipping non-http scheme in %r', url) return False host, port = urllib.parse.splitport(parts.netloc) if not self.host_okay(host): LOGGER.debug('skipping non-root host in %r', url) return False return True def add_url(self, url, max_redirect=None): """Add a URL to the queue if not seen before.""" if max_redirect is None: max_redirect = self.max_redirect LOGGER.debug('adding %r %r', url, max_redirect) self.seen_urls.add(url) self.q.put_nowait((url, max_redirect)) #put_nowait() Put an item into the queue without blocking.此句实际最先执行 @asyncio.coroutine def crawl(self): """Run the crawler until all finished.""" workers = [asyncio.Task(self.work(), loop=self.loop) for _ in range(self.max_tasks)] self.t0 = time.time() yield from self.q.join() #Block until all items in the queue have been gotten and processed.保持阻塞状态,直到处理了队列中的所有项目为止 self.t1 = time.time() for w in workers: w.cancel()
class Emotiv: """ Receives, decrypts and stores packets received from Emotiv Headsets. """ def __init__( self, display_output=False, serial_number="", is_research=False, filter_hz=25, pointer=0): """ Sets up initial values. """ self.running = True self.packets = Queue() self.data_to_send = Queue() self.battery = 0 self.display_output = display_output self.poll_interval = 1 / filter_hz self.is_research = is_research self.ptr = pointer self.sensors = { 'F3': {'value': 0, 'quality': 0}, 'FC6': {'value': 0, 'quality': 0}, 'P7': {'value': 0, 'quality': 0}, 'T8': {'value': 0, 'quality': 0}, 'F7': {'value': 0, 'quality': 0}, 'F8': {'value': 0, 'quality': 0}, 'T7': {'value': 0, 'quality': 0}, 'P8': {'value': 0, 'quality': 0}, 'AF4': {'value': 0, 'quality': 0}, 'F4': {'value': 0, 'quality': 0}, 'AF3': {'value': 0, 'quality': 0}, 'O2': {'value': 0, 'quality': 0}, 'O1': {'value': 0, 'quality': 0}, 'FC5': {'value': 0, 'quality': 0}, 'X': {'value': 0, 'quality': 0}, 'Y': {'value': 0, 'quality': 0}, 'Unknown': {'value': 0, 'quality': 0} } self.serial_number = serial_number self.old_model = False def set_filter(self, value): self.poll_interval = 1 / value async def setup(self): self._os_decryption = False if os.path.exists('/dev/eeg/raw'): self._os_decryption = True path = "/dev/eeg/raw" else: serial, hidraw_filename = get_linux_setup() self.serial_number = serial if os.path.exists("/dev/" + hidraw_filename): path = "/dev/" + hidraw_filename else: path = "/dev/hidraw4" self.device_path = path self.setup_crypto(self.serial_number) async def read_data(self): self.running = True with open(self.device_path, 'rb') as hidraw: while self.running: try: data = hidraw.read(32) if data != "": if self._os_decryption: self.packets.put_nowait(EmotivPacket(data)) else: tasks.put_nowait(data) self.ptr += 1 await self.process_tasks() await sleep(self.poll_interval) except KeyboardInterrupt: self.running = False hidraw.close() def setup_crypto(self, sn): """ Performs decryption of packets received. Stores decrypted packets in a Queue for use. """ if is_old_model(sn): self.old_model = True print("Old model:", self.old_model) k = ['\0'] * 16 k[0] = sn[-1] k[1] = '\0' k[2] = sn[-2] if self.is_research: k[3] = 'H' k[4] = sn[-1] k[5] = '\0' k[6] = sn[-2] k[7] = 'T' k[8] = sn[-3] k[9] = '\x10' k[10] = sn[-4] k[11] = 'B' else: k[3] = 'T' k[4] = sn[-3] k[5] = '\x10' k[6] = sn[-4] k[7] = 'B' k[8] = sn[-1] k[9] = '\0' k[10] = sn[-2] k[11] = 'H' k[12] = sn[-3] k[13] = '\0' k[14] = sn[-4] k[15] = 'P' key = ''.join(k) iv = Random.new().read(AES.block_size) self.cipher = AES.new(key, AES.MODE_ECB, iv) async def process_tasks(self): while not tasks.empty(): task = await tasks.get() try: data = ( self.cipher.decrypt(task[:16]) + self.cipher.decrypt(task[16:])) packet = EmotivPacket(data, self.sensors, self.old_model) self.packets.put_nowait(packet) self.data_to_send.put_nowait(packet) except Exception as e: print(type(e), e) def close(self): """ Shuts down the running greenlets. """ self.running = False async def update_console(self): while self.running: if self.display_output: packet = await self.packets.get() print(packet) # print('\n'.join("%s Reading: %s Quality: %s" % # (k[1], self.sensors[k[1]]['value'], # self.sensors[k[1]]['quality']) # for k in enumerate(self.sensors))) await sleep(self.poll_interval)
class Crawler: """Crawl a set of URLs. This manages two sets of URLs: 'urls' and 'done'. 'urls' is a set of URLs seen, and 'done' is a list of FetchStatistics. """ def __init__(self, roots, exclude=None, strict=True, # What to crawl. max_redirect=10, max_tries=4, # Per-url limits. max_tasks=10, *, loop=None): self.loop = loop or asyncio.get_event_loop() self.roots = roots self.exclude = exclude self.strict = strict self.max_redirect = max_redirect self.max_tries = max_tries self.max_tasks = max_tasks self.q = Queue(loop=self.loop) self.seen_urls = BloomFilter(10000000, 0.01) self.done = [] self.session = aiohttp.ClientSession(loop=self.loop) self.root_domains = set() for root in roots: parts = urllib.parse.urlparse(root) host, port = urllib.parse.splitport(parts.netloc) if not host: continue if re.match(r'\A[\d\.]*\Z', host): self.root_domains.add(host) else: host = host.lower() if self.strict: self.root_domains.add(host) else: self.root_domains.add(lenient_host(host)) for root in roots: self.add_url(root) self.t0 = time.time() self.t1 = None def close(self): """Close resources.""" self.session.close() def host_okay(self, host): """Check if a host should be crawled. A literal match (after lowercasing) is always good. For hosts that don't look like IP addresses, some approximate matches are okay depending on the strict flag. """ host = host.lower() if host in self.root_domains: return True if re.match(r'\A[\d\.]*\Z', host): return False if self.strict: return self._host_okay_strictish(host) else: return self._host_okay_lenient(host) def _host_okay_strictish(self, host): """Check if a host should be crawled, strict-ish version. This checks for equality modulo an initial 'www.' component. """ host = host[4:] if host.startswith('www.') else 'www.' + host return host in self.root_domains def _host_okay_lenient(self, host): """Check if a host should be crawled, lenient version. This compares the last two components of the host. """ return lenient_host(host) in self.root_domains def record_statistic(self, fetch_statistic): """Record the FetchStatistic for completed / failed URL.""" self.done.append(fetch_statistic) async def parse_links(self, response): """Return a FetchStatistic and list of links.""" links = set() content_type = None encoding = None body = await response.read() if response.status == 200: content_type = response.headers.get('content-type') pdict = {} if content_type: content_type, pdict = cgi.parse_header(content_type) encoding = pdict.get('charset', 'utf-8') if content_type in ('text/html', 'application/xml'): text = await response.text() # Replace href with (?:href|src) to follow image links. urls = set(re.findall(r'''(?i)href=["']([^\s"'<>]+)''', text)) if urls: LOGGER.info('got %r distinct urls from %r', len(urls), response.url) for url in urls: LOGGER.info("response.url:%s,type:%s", response.url, type(response.url)) LOGGER.info("parse_links url:%s,type:%s", url, type(url)) normalized = urllib.parse.urljoin(str(response.url), url) defragmented, frag = urllib.parse.urldefrag(normalized) if self.url_allowed(defragmented): links.add(defragmented) stat = FetchStatistic( url=response.url, next_url=None, status=response.status, exception=None, size=len(body), content_type=content_type, encoding=encoding, num_urls=len(links), num_new_urls=len(links) - len(self.seen_urls)) return stat, links async def fetch(self, url, max_redirect): """Fetch one URL.""" tries = 0 exception = None while tries < self.max_tries: try: response = await self.session.get( url, allow_redirects=False) if tries > 1: LOGGER.info('try %r for %r success', tries, url) break except aiohttp.ClientError as client_error: LOGGER.info('try %r for %r raised %r', tries, url, client_error) exception = client_error tries += 1 else: # We never broke out of the loop: all tries failed. LOGGER.error('%r failed after %r tries', url, self.max_tries) self.record_statistic(FetchStatistic(url=url, next_url=None, status=None, exception=exception, size=0, content_type=None, encoding=None, num_urls=0, num_new_urls=0)) return try: if is_redirect(response): location = response.headers['location'] next_url = urllib.parse.urljoin(url, location) self.record_statistic(FetchStatistic(url=url, next_url=next_url, status=response.status, exception=None, size=0, content_type=None, encoding=None, num_urls=0, num_new_urls=0)) if next_url in self.seen_urls: return if max_redirect > 0: LOGGER.info('redirect to %r from %r', next_url, url) self.add_url(next_url, max_redirect - 1) else: LOGGER.error('redirect limit reached for %r from %r', next_url, url) else: stat, links = await self.parse_links(response) self.record_statistic(stat) for link in utils.difference(links, self.seen_urls): # for link in links.difference(self.seen_urls): self.q.put_nowait((link, self.max_redirect)) # self.seen_urls.update(links) self.seen_urls.update(links) finally: await response.release() async def work(self): """Process queue items forever.""" try: while True: url, max_redirect = await self.q.get() assert url in self.seen_urls LOGGER.info("url:%s", url) LOGGER.info("max_redirect:%s", max_redirect) await self.fetch(url, max_redirect) self.q.task_done() except asyncio.CancelledError: pass def url_allowed(self, url): if self.exclude and re.search(self.exclude, url): return False parts = urllib.parse.urlparse(url) if parts.scheme not in ('http', 'https'): LOGGER.debug('skipping non-http scheme in %r', url) return False host, port = urllib.parse.splitport(parts.netloc) if not self.host_okay(host): LOGGER.debug('skipping non-root host in %r', url) return False return True def add_url(self, url, max_redirect=None): """Add a URL to the queue if not seen before.""" if max_redirect is None: max_redirect = self.max_redirect LOGGER.debug('adding %r %r', url, max_redirect) self.seen_urls.add(url) self.q.put_nowait((url, max_redirect)) async def crawl(self): """Run the crawler until all finished.""" workers = [asyncio.Task(self.work(), loop=self.loop) for _ in range(self.max_tasks)] self.t0 = time.time() yield self.q.join() self.t1 = time.time() for w in workers: w.cancel()
from sys import stdin,stdout from asyncio import Queue suma = 0 harrys = Queue() monks = [] n = int(stdin.readline().strip()) for x in stdin.readline().split(' '): num = int(x) harrys.put_nowait(num) Q,target = map(int,stdin.readline().split(' ')) res = -1 if suma == target: res = 0 else: for q in range(Q): op = stdin.readline().strip() if op == "Harry": num = harrys.get_nowait() monks.append(num) suma += num else: num = monks.pop() suma -= num if suma == target: res = monks.__len__() break stdout.write(str(res)+'\n')
class Crawler: def __init__(self, root_url, max_redirect): self.max_tasks = 10 self.max_redirect = max_redirect self.q = Queue() self.seen_urls = set() # aiohttp's ClientSession does connection pooling and # HTTP keep-alives for us. self.session = aiohttp.ClientSession(loop=loop) # Put (URL, max_redirect) in the Queue self.q.put((root_url, self.max_redirect)) @asyncio.coroutine def crawl(self): '''Run the crawler untill all work is done.''' workers = [asyncio.Task(self.work()) for _ in range(self.max_tasks)] # When all work is done, exit. yield from self.q.join() for w in workers: w.cancel() @asyncio.coroutine def work(self): while True: url, max_redirect = yield from self.q.get() # Download page and add new links to self.q yield from self.fetch(url, max_redirect) self.q.task_done() @asyncio.coroutine def fetch(self, url, max_redirect): # Handle redirects ourselves. response = yield from self.session.get( url, allow_redirects=False) try: if is_redirect(response): if max_redirect > 0: next_url = response.headers['location'] if next_url in self.seen_urls: # We have done this before. return # Remember we have seen this url. self.seen_urls.add(next_url) # Follow the redirect. One less redirect remains. self.q.put_nowait((next_url, max_redirect -1)) else: links = yield from self.parse_links(response) # Python set-logic: for link in links.difference(self.seen_urls): self.q.put_nowait((link, self.max_redirect)) self.seen_urls.update(links) finally: # Return connection to pool. yield from response.release()
__author__ = 'zhangxa' """ Example of how to user tornado gen.coroutine """ from tornado import ioloop from tornado import gen from asyncio import Queue,QueueEmpty from tornado.concurrent import Future import functools queue = Queue() for i in range(10): queue.put_nowait(i) def queue_get(): future = Future() try: future.set_result(queue.get_nowait()) except QueueEmpty: pass return future @gen.coroutine def yield_func(n): print("here") for i in range(n): x = yield queue_get() print(x)
class Crawler(object): """Crawl a set of URLs. This manages two sets of URLs: 'urls' and 'done'. 'urls' is a set of URLs seen, and 'done' is a list of FetchStatistics. """ def __init__( self, roots, scraper=None, data_handler=None, exclude=None, strict=True, # What to crawl. max_redirect=5, max_tries=10, # Per-url limits. max_tasks=10, max_connections_per_host=3, *, loop=None ): self.loop = loop or asyncio.get_event_loop() self.roots = roots self.max_connections_per_host = max_connections_per_host self.scraper = scraper self.data_handler = data_handler self.exclude = exclude self.strict = strict self.max_redirect = max_redirect self.max_tries = max_tries self.max_tasks = max_tasks self.q = Queue(loop=self.loop) self.seen_urls = set() self.done = [] self.session = aiohttp.ClientSession(loop=self.loop) self.root_domains = set() for root in roots: parts = urllib.parse.urlparse(root) host, port = urllib.parse.splitport(parts.netloc) if not host: continue if re.match(r"\A[\d\.]*\Z", host): self.root_domains.add(host) else: host = host.lower() if self.strict: self.root_domains.add(host) else: self.root_domains.add(lenient_host(host)) for root in roots: self.add_urls(root) self.t0 = time.time() self.t1 = None def record_statistic( self, url=None, next_url=None, status=None, exception=None, content_type=None, encoding=None, num_urls=0, num_new_urls=0, ): """Record the FetchStatistic for completed / failed URL.""" fetch_statistic = FetchStatistic( url=url, next_url=next_url, status=status, size=0, exception=exception, content_type=content_type, encoding=encoding, num_urls=num_urls, num_new_urls=num_new_urls, ) self.done.append(fetch_statistic) def extract_data(self, root_url, html): raise NotImplementedError("You need to define a extract_data method!") def close(self): """Close resources.""" LOGGER.debug("closing resources") self.session.close() @asyncio.coroutine def parse_links(self, web_page_html, base_url, _content_type, _encoding): """Return a list of links.""" links = set() tree = html.fromstring(web_page_html) tree.make_links_absolute(base_url) urls = [link[2] for link in tree.iterlinks()] for url in urls: defragmented, frag = urllib.parse.urldefrag(url) if verify.url_allowed( defragmented, self.root_domains, exclude=self.exclude ): # Select Valid links, testing against regexp and root_domains links.add(defragmented) if urls: LOGGER.info( "got %r urls from %r new links: %i visited: %i", len(urls), base_url, len(links - self.seen_urls), len(self.seen_urls), ) new_links = [link for link in links.difference(self.seen_urls)] self.record_statistic( url=base_url, content_type=_content_type, encoding=_encoding, num_urls=len(links), num_new_urls=len(links - self.seen_urls), ) return new_links def handle_redirect(self, response, url, max_redirect): location = response.headers["location"] next_url = urllib.parse.urljoin(url, location) self.record_statistic(url=url, next_url=next_url, status=response.status) if next_url in self.seen_urls: return if max_redirect > 0: LOGGER.info("redirect to %r from %r max_redir: %i", next_url, url, max_redirect - 1) self.add_urls(next_url, max_redirect - 1) else: LOGGER.error("redirect limit reached for %r from %r", next_url, url) return @asyncio.coroutine def fetch(self, url, max_redirect, sem): """Fetch one URL.""" tries = 0 web_page = None exception = None _url = None _encoding = None _content_type = None sleep_time = 0 while tries < self.max_tries: try: with (yield from sem): response = yield from asyncio.wait_for( self.session.get(url, allow_redirects=False), 10, loop=self.loop ) if tries > 1: LOGGER.debug("try %r for %r success", tries, url) break except Exception as client_error: sleep_time += 5 yield from asyncio.sleep(sleep_time) LOGGER.error("try %r for %r raised %r", tries, url, client_error) exception = client_error tries += 1 else: # We never broke out of the loop: all tries failed. LOGGER.error("%r failed after %r tries", url, self.max_tries) self.record_statistic(url=url, exception=exception) return (web_page, _url, _content_type, _encoding) try: _url, _content_type, _encoding = get_content_type_and_encoding(response) if is_redirect(response): self.handle_redirect(response, url, max_redirect) web_page = "redirect" elif response.status == 200 and _content_type in ("text/html", "application/xml"): web_page = yield from response.text() else: self.record_statistic( url=response.url, status=response.status, content_type=_content_type, encoding=_encoding ) except Exception as e: print("*******error**********") finally: yield from response.release() return (web_page, _url, _content_type, _encoding) def add_urls(self, urls, max_redirect=None): """Add a URL to the queue if not seen before.""" if max_redirect is None: max_redirect = self.max_redirect if not isinstance(urls, str): urls = set(urls) for link in urls.difference(self.seen_urls): self.q.put_nowait((link, max_redirect)) self.seen_urls.update(urls) elif urls not in self.seen_urls: self.q.put_nowait((urls, max_redirect)) self.seen_urls.add(urls) @asyncio.coroutine def work(self, sem): """Process queue items forever.""" try: while True: url, max_redirect = yield from self.q.get() # assert url in self.seen_urls web_page, url, content_type, encoding = yield from self.fetch(url, max_redirect, sem) if web_page and web_page != "redirect": new_links = yield from self.parse_links(web_page, url, content_type, encoding) if self.scraper: data = self.scraper.scrape(url, web_page) if self.data_handler: self.data_handler.handle(data) self.add_urls(new_links) self.q.task_done() except (asyncio.CancelledError,): print("error") @asyncio.coroutine def crawl(self): sem = asyncio.Semaphore(value=self.max_connections_per_host, loop=self.loop) """Run the crawler until all finished.""" LOGGER.info("Starting crawl...") workers = [asyncio.Task(self.work(sem), loop=self.loop) for _ in range(self.max_tasks)] self.t0 = time.time() yield from self.q.join() self.t1 = time.time() for w in workers: w.cancel()
def workerTask(q): while not q.empty(): processImage(q.get_nowait()[0]) q.task_done() if not os.path.exists("__working"): os.mkdir("__working") convertPdfs(pdfList) q = Queue(maxsize=0) num_threads = 4 #put files in queue for fileName in os.listdir("__working"): if fileName.endswith(".pbm"): q.put_nowait(("__working/" + fileName,)) threads = [] for i in range(num_threads): worker = Thread(target=workerTask, args=(q,)) worker.start() threads.append(worker) q.join() for thread in threads: thread.join() subprocess.run("rm -r __working", shell=True)
def udp_reader(s: socket, iqueue: Queue, size: int) -> None: """Read one or more packets from an UDP socket.""" data, peer = s.recvfrom(size) iqueue.put_nowait((peer, data))