def __init__(self): self.connected = False self.connected_event = Event() self.disconnected_event = Event() self.presence_queue = Queue() self.message_queue = Queue() self.error_queue = Queue()
async def process_partitions_queue( loop: asyncio.BaseEventLoop, partitions_queue: asyncio.Queue, results_queue: asyncio.Queue, server_address: URL, mission_template: Template, mission_loader: str, width: int, scale: int, ) -> Awaitable[None]: mission_name = mission_loader.split('/', 1)[0] async with aiohttp.ClientSession() as http: while True: partition = await partitions_queue.get() if partition is None: partitions_queue.task_done() return await process_partition( loop=loop, results_queue=results_queue, server_address=server_address, http=http, partition=partition, mission_template=mission_template, mission_loader=mission_loader, mission_name=mission_name, width=width, scale=scale, ) partitions_queue.task_done()
class MagicEmotiv: def __init__(self, ptr, upd_interval): self.data_to_send = Queue() self.battery = 40 self.packets = Queue() self.ptr = ptr self.poll_interval = upd_interval def set_filter(self, value): self.poll_interval = 1 / value async def setup(self): print("creating magic emotiv...") async def read_data(self): while self.running: s = {} for shift, sensor in enumerate(sorted(sensor_bits, reverse=True)): s[sensor] = {'quality': 0.0} s[sensor]['value'] = np.random.normal() + shift * 5 packet = MagicPacket( b'Py2\x18\xe7\xb7\xdf\x8e\x86n;g\xbd\xc0\x00\x00\x02\x11(!`' + b'=\x80\x15\xecX\xc6 \xd9ii\x9e', s, False) self.packets.put_nowait(packet) self.data_to_send.put_nowait(packet) self.ptr += 1 await asyncio.sleep(self.poll_interval) async def update_console(self): while self.running: packet = await self.packets.get() print(packet) await asyncio.sleep(self.poll_interval)
def __init__( self, display_output=False, serial_number="", is_research=False, filter_hz=25, pointer=0): """ Sets up initial values. """ self.running = True self.packets = Queue() self.data_to_send = Queue() self.battery = 0 self.display_output = display_output self.poll_interval = 1 / filter_hz self.is_research = is_research self.ptr = pointer self.sensors = { 'F3': {'value': 0, 'quality': 0}, 'FC6': {'value': 0, 'quality': 0}, 'P7': {'value': 0, 'quality': 0}, 'T8': {'value': 0, 'quality': 0}, 'F7': {'value': 0, 'quality': 0}, 'F8': {'value': 0, 'quality': 0}, 'T7': {'value': 0, 'quality': 0}, 'P8': {'value': 0, 'quality': 0}, 'AF4': {'value': 0, 'quality': 0}, 'F4': {'value': 0, 'quality': 0}, 'AF3': {'value': 0, 'quality': 0}, 'O2': {'value': 0, 'quality': 0}, 'O1': {'value': 0, 'quality': 0}, 'FC5': {'value': 0, 'quality': 0}, 'X': {'value': 0, 'quality': 0}, 'Y': {'value': 0, 'quality': 0}, 'Unknown': {'value': 0, 'quality': 0} } self.serial_number = serial_number self.old_model = False
class Waiter(BaseHandler): """ The Waiter handler allows an event handler to block until a particular stanza has been received. The handler will either be given the matched stanza, or ``False`` if the waiter has timed out. :param string name: The name of the handler. :param matcher: A :class:`~slixmpp.xmlstream.matcher.base.MatcherBase` derived object for matching stanza objects. :param stream: The :class:`~slixmpp.xmlstream.xmlstream.XMLStream` instance this handler should monitor. """ def __init__(self, name, matcher, stream=None): BaseHandler.__init__(self, name, matcher, stream=stream) self._payload = Queue() def prerun(self, payload): """Store the matched stanza when received during processing. :param payload: The matched :class:`~slixmpp.xmlstream.stanzabase.ElementBase` object. """ self._payload.put_nowait(payload) def run(self, payload): """Do not process this handler during the main event loop.""" pass @asyncio.coroutine def wait(self, timeout=None): """Block an event handler while waiting for a stanza to arrive. Be aware that this will impact performance if called from a non-threaded event handler. Will return either the received stanza, or ``False`` if the waiter timed out. :param int timeout: The number of seconds to wait for the stanza to arrive. Defaults to the the stream's :class:`~slixmpp.xmlstream.xmlstream.XMLStream.response_timeout` value. """ if timeout is None: timeout = slixmpp.xmlstream.RESPONSE_TIMEOUT stanza = None try: stanza = yield from self._payload.get() except TimeoutError: log.warning("Timed out waiting for %s", self.name) self.stream().remove_handler(self.name) return stanza def check_delete(self): """Always remove waiters after use.""" return True
def test_nonmatching(): i_queue = Queue() o_queue = find_events(i_queue) for in_string in NONMATCHING_TESTS: yield from i_queue.put(in_string) yield from o_queue.get()
def test_matching(): i_queue = Queue() o_queue = find_events(i_queue) for in_string, event in MATCHING_TESTS: yield from i_queue.put(in_string) ev = yield from o_queue.get() eq_(ev, event)
class MessageHandler(ws.WS): def __init__(self): self.queue = Queue() def get(self): return self.queue.get() def on_message(self, websocket, message): return self.queue.put(message)
async def udp_writer(s: socket, oqueue: Queue) -> None: """Forward packets to the UDP socket.""" while True: peer, data = await oqueue.get() try: s.sendto(data, peer) finally: oqueue.task_done()
def new_queue(): global _main_loop queue = Queue(loop=_main_loop) def putter(item): _main_loop.call_soon_threadsafe(queue.put_nowait, item) queue.put = putter return queue
class Listener: def __init__(self): self._messages = Queue() def __call__(self, channel, message): self._messages.put_nowait((channel, message)) def get(self): return self._messages.get()
class Message(ws.WS): def __init__(self, loop): self.queue = Queue(loop=loop) def get(self): return self.queue.get() def on_message(self, websocket, message): self.queue.put_nowait(message)
async def call(loop, inq: asyncio.Queue): while True: v = await inq.get() logger.debug("consume[S] v:%s", v) if v is None: inq.task_done() break v = await afn(v) logger.debug("consume[E] v:%s", v) inq.task_done() await inq.join() logger.debug("consume[CLOSE]")
async def __call__(self, inq: asyncio.Queue): while True: v = await inq.get() logger.debug("aggregate[S] v:%s", v) if v is None: inq.task_done() break await asyncio.sleep(0.1, loop=self.loop) print(v) logger.debug("aggregate[E] v:%s", v) inq.task_done() await inq.join() logger.debug("aggregate[CLOSE]")
async def call(loop, inq: asyncio.Queue, outq: asyncio.Queue): while True: v = await inq.get() logger.debug("communicate[S] v:%s", v) if v is None: inq.task_done() break v = await afn(v) logger.debug("communicate[E] v:%s", v) await outq.put(v) inq.task_done() await inq.join() await outq.put(None) logger.debug("communicate[CLOSE]")
class ConnectionPool: def __init__(self): self._config_dict = None self._queue = Queue() self._outstanding_connections = WeakSet() async def get_conn(self): self._check_config() try: while True: conn = self._queue.get_nowait() if conn.is_open(): break try: await conn.close() except Exception: l.debug('Exception in close rethink connection', exc_info=True) except QueueEmpty: conn = await r.connect(**self._config_dict) self._outstanding_connections.add(conn) return conn async def put_conn(self, conn): self._queue.put_nowait(conn) self._outstanding_connections.remove(conn) def set_config(self, config): self._config_dict = config def get_config(self): self._check_config() return self._config_dict async def teardown(self): while True: try: conn = self._queue.get_nowait() except QueueEmpty: break self._outstanding_connections.add(conn) for conn in self._outstanding_connections: try: await conn.close() except Exception: l.debug('Exception in close rethink connection', exc_info=True) def _check_config(self): assert self._config_dict is not None, "Did you remember to run resync.setup()?"
async def __call__(self, inq: asyncio.Queue, outq: asyncio.Queue): while True: v = await inq.get() logger.debug("communicate[S] v:%s", v) if v is None: inq.task_done() break await asyncio.sleep(0.1, loop=self.loop) v = v * v logger.debug("communicate[E] v:%s", v) await outq.put(v) inq.task_done() await inq.join() await outq.put(None) logger.debug("communicate[CLOSE]")
def input(self, fd, dst): q = Queue() def cb(): q.put_nowait(os.read(fd, 32)) self.loop.add_reader(fd, cb) try: while True: data = yield from q.get() if not data: break yield from send(dst, BYTES, data) finally: self.loop.remove_reader(fd)
def __init__(self, basePath, max_tasks=25): # max concurrent tasks self.max_tasks = max_tasks # we have seen this url self.processed = set() # BasePath of url to start crawl, should be root of a domain self.basePath = basePath # event loop, we are not fallbacking to iocp (win32) or select or any sort of other event loop, we will only use asyncio provided event loop self.loop = asyncio.get_event_loop() # create our session, which encapsulates a connection pool self.session = aiohttp.ClientSession(loop=self.loop) # get Queue self.queue = Queue(loop=self.loop) # first url self.queue.put_nowait(self.basePath) # JSON for visualization self.data = []
def __init__(self, roots, exclude=None, strict=True, # What to crawl. max_redirect=10, max_tries=4, # Per-url limits. max_tasks=10, *, loop=None): self.loop = loop or asyncio.get_event_loop() self.roots = roots self.exclude = exclude self.strict = strict self.max_redirect = max_redirect self.max_tries = max_tries self.max_tasks = max_tasks self.q = Queue(loop=self.loop) self.seen_urls = BloomFilter(10000000, 0.01) self.done = [] self.session = aiohttp.ClientSession(loop=self.loop) self.root_domains = set() for root in roots: parts = urllib.parse.urlparse(root) host, port = urllib.parse.splitport(parts.netloc) if not host: continue if re.match(r'\A[\d\.]*\Z', host): self.root_domains.add(host) else: host = host.lower() if self.strict: self.root_domains.add(host) else: self.root_domains.add(lenient_host(host)) for root in roots: self.add_url(root) self.t0 = time.time() self.t1 = None
def __init__(self, pubnub_instance): subscription_manager = self self._message_worker = None self._message_queue = Queue() self._subscription_lock = Semaphore(1) self._subscribe_loop_task = None self._heartbeat_periodic_callback = None self._reconnection_manager = AsyncioReconnectionManager(pubnub_instance) super(AsyncioSubscriptionManager, self).__init__(pubnub_instance) self._start_worker() class AsyncioReconnectionCallback(ReconnectionCallback): def on_reconnect(self): subscription_manager.reconnect() pn_status = PNStatus() pn_status.category = PNStatusCategory.PNReconnectedCategory pn_status.error = False subscription_manager._subscription_status_announced = True subscription_manager._listener_manager.announce_status(pn_status) self._reconnection_listener = AsyncioReconnectionCallback() self._reconnection_manager.set_reconnection_listener(self._reconnection_listener)
class Domain(object): def __init__(self, domain): self.domain = domain self.requests = Queue() loop = asyncio.get_event_loop() self.fetcher = loop.create_task(self.fetch_loop()) async def fetch_loop(self): print('Domain({}): Fetch loop started'.format(self.domain)) while True: addr, future = await self.requests.get() # Start an asynchronous sleep of crawl delay in length print('Domain({}): Fetching {}'.format(self.domain, addr)) await asyncio.sleep(1) # Wait until the async sleep with crawl delay is complete future.set_result(42) async def get(self, addr): future = loop.create_future() await self.requests.put((addr, future)) print('Q:', self.requests.qsize()) print('Domain({}): Queued {}'.format(self.domain, addr)) return future async def close(self): # Cancelling only starts the process to cancel self.fetcher.cancel() # We must wait for the task itself to complete await self.fetcher
def __init__(self): self._init_states() self.remote_address = None self.remote_port = None self.client_id = None self.clean_session = None self.will_flag = False self.will_message = None self.will_qos = None self.will_retain = None self.will_topic = None self.keep_alive = 0 self.publish_retry_delay = 0 self.broker_uri = None self.username = None self.password = None self.cafile = None self.capath = None self.cadata = None self._packet_id = 0 self.parent = 0 # Used to store outgoing ApplicationMessage while publish protocol flows self.inflight_out = OrderedDict() # Used to store incoming ApplicationMessage while publish protocol flows self.inflight_in = OrderedDict() # Stores messages retained for this session self.retained_messages = Queue() # Stores PUBLISH messages ID received in order and ready for application process self.delivered_message_queue = Queue()
def __init__(self, creator, pool_size=10, loop=None, timeout=None, **kw): self._creator = creator self._closed = False self._timeout = timeout self._queue = Queue(maxsize=pool_size, loop=loop) self._connecting = 0 self._loop = self._queue._loop self._in_use_connections = set()
class AverageMessageHandlerForTest(AverageMemoryMessageHandler): def __init__(self, keys, average_period_minutes=0): super().__init__(keys, average_period_minutes) self.queue = Queue() @asyncio.coroutine def save(self, average_message): yield from self.queue.put(average_message)
async def process_results_queue( results_queue: asyncio.Queue, total_points: int, output_file_path: Path, ) -> Awaitable[None]: point_size = calcsize(HEIGHT_PACK_FORMAT) output_size = point_size * total_points natural_size = humanize.naturalsize( output_size, binary=True, format='%.3f', ) LOG.debug(f"output size: {natural_size}") processed_points = 0 output_file_path.parent.parent.mkdir(parents=True, exist_ok=True) with output_file_path.open('wb') as f: f.truncate(output_size) while True: data = await results_queue.get() if not data: results_queue.task_done() return partition, values = data start = partition.start * point_size processed_points += (partition.end - partition.start) + 1 progress = (processed_points / total_points) * 100 LOG.debug( f"gather results for range " f"[{partition.start}:{partition.end}], " f"progress: {progress:.2f}%" ) f.seek(start) f.write(values) results_queue.task_done()
def __init__(self, root): self.visited_urls = [] self.root = self.add_scheme(root) if len(self.root.host) < 4: sys.exit('invalid taget {}'.format(self.root.host)) self.target_path = '/opt/snare/pages/{}'.format(self.root.host) if not os.path.exists(self.target_path): os.mkdir(self.target_path) self.new_urls = Queue()
def __init__(self, root_url, max_redirect): self.max_tasks = 10 self.max_redirect = max_redirect self.q = Queue() self.seen_urls = set() # aiohttp's ClientSession does connection pooling and # HTTP keep-alives for us. self.session = aiohttp.ClientSession(loop=loop) # Put (URL, max_redirect) in the Queue self.q.put((root_url, self.max_redirect))
class Echo(WS): def __init__(self, loop=None): self.queue = Queue(loop=loop) def get(self): return self.queue.get() def on_message(self, ws, message): self.queue.put_nowait(message) def on_ping(self, ws, body): ws.pong(body) self.queue.put_nowait('PING: %s' % body.decode('utf-8')) def on_pong(self, ws, body): self.queue.put_nowait('PONG: %s' % body.decode('utf-8')) def on_close(self, ws): self.queue.put_nowait('CLOSE')
def __init__(self, root, max_depth, css_validate): self.visited_urls = [] self.root, self.error_page = self.add_scheme(root) self.max_depth = max_depth self.moved_root = None if len(self.root.host) < 4: sys.exit('invalid target {}'.format(self.root.host)) self.target_path = '/opt/snare/pages/{}'.format(self.root.host) if not os.path.exists(self.target_path): os.mkdir(self.target_path) self.css_validate = css_validate self.new_urls = Queue() self.meta = {} self.logger = logging.getLogger(__name__)
async def backend_loop(forward_queue: asyncio.Queue, backend_queue: asyncio.Queue): """ :description: Main Coroutine for the backend. Responsible for calling stalker functions. :param forward_queue: The Queue which is handled by the forwarder of the main event loop. :type forward_queue: asyncio.Queue :param backend_queue: The Queue that is handled by this Coroutine. :type backend_queue: asyncio.Queue :return: None :rtype: None """ while True: query = await backend_queue.get() # check next_step to perform if query.next_step == "stalk": stalker = determine_stalker(query) logger.debug(f"Using stalker function {stalker.__name__}") if stalker is None: # if stalker is none an error has occurred during stalker determination and saved to query forward_queue.put_nowait(query) backend_queue.task_done() continue # check flags ranks = False if len(with_ranks_flag_lookup.intersection(query.flags)) >= 1: ranks = True # extra case for prime league season with ranks # in this case a message should inform the user that this might take a moment if stalker is prime_league.stalk_prime_league_season and ranks: extra_message = Message( "Running prime league season stalk with ranks might take a while, also output " "only as file." ) extra_query = Query( query.context_type, "frontend", "format", discord_channel=query.discord_channel, payload=extra_message, ) forward_queue.put_nowait(extra_query) query.update_query(query.forward_to, query.next_step, flags="file") logger.debug(f"Starting stalk for query: {query.raw_command}") try: payload = await stalker(query.data) # TODO add better error handling based on exception raised except Exception as e: error_message = ( f"While stalking a {type(e)} occurred. Original query: {query}" ) logger.exception(error_message) create_error(query, error_message) forward_queue.put_nowait(query) backend_queue.task_done() continue logger.debug(f"Finished stalking for query: {query.raw_command}") # rank stalking is the slowest part and for mid to large sized tournaments it takes some time if isinstance(payload, TeamList): if len(payload.teams) > 30 and ranks: extra_message = Message( f"Rank stalk for {len(payload.teams)} teams might take a moment, please " f"wait." ) extra_query = Query( query.context_type, "frontend", "format", discord_channel=query.discord_channel, payload=extra_message, ) forward_queue.put_nowait(extra_query) if ranks: logger.debug(f"Starting rank stalk for query: {query.raw_command}") # try loading a Riot Api Token found_riot_token = False try: token_loader.load_token("RiotToken") found_riot_token = True except TokenLoadingError as e: logger.info("Failed to load RiotToken, using op.gg instead.") if ( found_riot_token and len(query.flags.intersection(dont_use_api_flag_lookup)) == 0 ): await call_rank_stalker(payload, use_api=True) query.flags.add(*used_riot_api_flag_lookup) else: await call_rank_stalker(payload) logger.debug(f"Finished rank stalk for query: {query.raw_command}") # adds data to db # TODO don't forget to add data query.update_query("frontend", "format", payload=payload) # forward query forward_queue.put_nowait(query) # new backend tasks should be added here else: logger.error( f"Invalid control flow in backend master for query {str(query)}, discarding query." ) del query backend_queue.task_done()
async def listen_for_order_book_diffs(self, ev_loop: asyncio.BaseEventLoop, output: asyncio.Queue): while True: try: trading_pairs_full_list: List[ str] = await self.get_trading_pairs() trading_pairs_partial_lists: List[List[str]] = [ trading_pairs_full_list[ m:m + IDEX_WS_TRADING_PAIRS_SUBSCRIPTION_LIMIT] for m in range(0, len(trading_pairs_full_list), IDEX_WS_TRADING_PAIRS_SUBSCRIPTION_LIMIT) ] for trading_pairs in trading_pairs_partial_lists: async with websockets.connect(IDEX_WS_URL) as ws: ws: websockets.WebSocketClientProtocol = ws await self._send_handshake(ws) async for raw_message in self._inner_messages(ws): decoded: Dict[str, Any] = ujson.loads(raw_message) request: str = decoded.get("request") diff_messages: List[Dict[str, Any]] = [] # after response from handshake, send subscribe message if request == "handshake": await self._send_subscribe( ws, trading_pairs, decoded) continue event: str = decoded.get("event") payload: Dict[str, Any] = ujson.loads( decoded["payload"] ) # payload is stringified json if event == "market_orders": orders: List[str, Any] = payload["orders"] market: str = payload["market"] diff_messages = [{ **o, "event": event, "market": market } for o in orders] elif event == "market_cancels": cancels: List[str, Any] = payload["cancels"] market: str = payload["market"] diff_messages = [{ **c, "event": event, "market": market } for c in cancels] elif event == "market_trades": trades: List[str, Any] = payload["trades"] diff_messages = [{ **t, "event": event } for t in trades] else: # ignore message if event is not recognized continue for diff_message in diff_messages: ob_message: IDEXOrderBookMessage = self.order_book_class.diff_message_from_exchange( diff_message) output.put_nowait(ob_message) except asyncio.CancelledError: raise except Exception: self.logger().network( f"Error getting order book diff messages.", exc_info=True, app_warning_msg= f"Error getting order book diff messages. Check network connection." ) await asyncio.sleep(30.0)
def __init__(self, message_queue: Queue = None) -> None: super(QueueOutputChannel).__init__() self.messages = Queue() if not message_queue else message_queue
class Crawler: def __init__(self, domain, max_redirects=10, max_retries=3, max_tasks=10): self.domain = domain self.max_redirects = max_redirects self.max_tasks = max_tasks self.max_retries = max_retries # self.loop = loop or asyncio.get_event_loop() self.q = Queue() self.urls_seen = set() self.session = aiohttp.ClientSession() self.add_url('/') @asyncio.coroutine def crawl(self): workers = [asyncio.Task(self.work()) for _ in range(self.max_tasks)] yield from self.q.join() for worker in workers: worker.cancel() def close(self): self.session.close() @asyncio.coroutine def work(self): try: while True: url, max_redirects = yield from self.q.get() # LOGGER.debug('fetching {}'.format(url)) yield from self.fetch(url, max_redirects) self.q.task_done() except asyncio.CancelledError: pass @asyncio.coroutine def fetch(self, url, max_redirects): retry = 0 while retry < self.max_retries: try: response = yield from self.session.get(self.domain+url, allow_redirects=False) LOGGER.debug('fetched {}'.format(url)) break except aiohttp.ClientError as client_err: retry += 1 LOGGER.info('fetching {} failed {} times with error {}'.format(url, retry, client_err)) except Exception as e: LOGGER.error('fetching {} with error: {}'.format(url, e)) return else: LOGGER.error('fetching {} out of max retry times'.format(url)) return if self.is_redirect(response): location = response.headers['location'] next_url = urllib.parse.urlparse(location).path next_url = urllib.parse.urljoin(url, next_url) if next_url in self.urls_seen: pass elif max_redirects > 0: self.add_url(next_url, max_redirects-1) LOGGER.info('redirect from {} to {}'.format(url, next_url)) else: LOGGER.error('redirect from {} to {} out of times'.format(url, next_url)) else: links = yield from self.parse_links(response) LOGGER.debug('parsed {} links from {}'.format(len(links), url)) for link in links.difference(self.urls_seen): self.q.put_nowait((link, self.max_redirects)) self.urls_seen.update(links) yield from response.release() def add_url(self, url, max_redirects=None): max_redi = max_redirects or self.max_redirects self.urls_seen.add(url) self.q.put_nowait((url, max_redi)) def is_redirect(self, response): return response.status in (300, 301, 302, 303, 307) @asyncio.coroutine def parse_links(self, response): links = set() if response.status == 200: content_type = response.headers.get('content-type', '') if content_type and content_type.startswith('text/html'): text = yield from response.text() urls = set(re.findall(r'''(?i)href=["']([^\s"'<>]+)''', text)) if urls: LOGGER.info('got {} distinct urls from {}'.format( len(urls), response.url)) for url in urls: norm_url = urllib.parse.urljoin(response.url, url) url_parts = urllib.parse.urlparse(norm_url) if url_parts.scheme not in ('http', 'https', ''): continue host, port = urllib.parse.splitport(url_parts.netloc) host = host.lower() host = host[4:] if host.startswith('www.') else host if host and not host in self.domain: continue defragmented, frag = urllib.parse.urldefrag(url_parts.path) links.add(defragmented) return links
def __init__(self, auto_start=True) -> None: self.queue = Queue() self.loop_on: asyncio.Task = None if auto_start: self.start()
class CrawlerBase: def __init__(self, max_retries=3, loop=None, max_tasks=3, max_redirect=1): self.max_retries = max_retries self.max_tasks = max_tasks self.max_redirect = max_redirect self.seen_urls = set() self.loop = loop or asyncio.get_event_loop() self.q = Queue(loop=self.loop) self.session = aiohttp.ClientSession(loop=self.loop) async def fetch(self, url, **kwargs): retries = 0 logging.info("Preparing for fetching html {}".format(url)) while retries < self.max_retries: try: resp = await self.session.get(url, **kwargs) # self.parse_resp(await self.session.get(url, **kwargs)) if retries > 1: logging.info('try {} for {} success'.format(retries, url)) return await resp.text() # break except aiohttp.client_exceptions as e: print(e) retries += 1 def parse_resp(self, resp): raise NotImplementedError def close(self): self.session.close() async def work(self): try: while True: url, max_redirect = await self.q.get() assert url in self.seen_urls resp = await self.fetch(url) self.parse_resp(resp) await self.sleep() self.q.task_done() except asyncio.CancelledError: pass @staticmethod def url_check(url): parts = urllib.parse.urlparse(url) if parts.scheme not in ('http', 'https'): logging.debug('skipping non-http scheme in {}'.format(url)) return False return True def add_url(self, url, max_redirect=None): if max_redirect is None: max_redirect = self.max_redirect logging.info('adding {} {}'.format(url, max_redirect)) if self.url_check(url): self.seen_urls.add(url) self.q.put_nowait((url, max_redirect)) else: logging.info('adding {} {} failed'.format(url, max_redirect)) def add_url_from_file(self, file_path, max_redirect=None): if max_redirect is None: max_redirect = self.max_redirect with open(file_path, "r") as _f: lines = _f.readlines() for url in lines: logging.info('adding {} {}'.format(url, max_redirect)) if self.url_check(url): self.seen_urls.add(url) self.q.put_nowait((url, max_redirect)) else: logging.info('adding {} {} failed'.format(url, max_redirect)) async def run_work(self): workers = [asyncio.Task(self.work(), loop=self.loop) for _ in range(self.max_tasks)] await self.q.join() for w in workers: w.cancel() @staticmethod async def sleep(): await asyncio.sleep(1)
def __init__(self, port): self._active_connections = set() self.port = port self.server = None self.queue = Queue()
async def __worker(self, name: str, queue: asyncio.Queue) -> NoReturn: self.logger.debug('Worker [%s] initializing...', name) while not self.__done: try: # Allow other pending co-routines to run # await asyncio.sleep(0.0) item = queue.get_nowait() except asyncio.QueueEmpty: await asyncio.sleep(0.01) continue session = self.__dbm.openSession() try: # Unpack the queued request sir_id = item['key'] spot_instance_request = item['value'] instance = item['instance'] ec2_conn = item['ec2_conn'] max_register_duration = item['max_register_duration'] node = None # Attempt to fetch the node matching the instance in the spot request. if instance: if 'id' in instance: try: node = self.__get_node_by_instance( session, instance['instance']) except Exception as ex: # pylint: disable=broad-except self.logger.debug('Unable to fetch node: %s', ex) self.logger.info( 'Worker [%s] processing spot instance request id [%s]', name, sir_id, ) with await self.__bad_request_lock: if sir_id in self.__bad_requests: self.logger.warning( 'Ignoring invalid spot instance request: [%s]', sir_id, ) continue try: await self.process_spot_instance_request( ec2_conn, session, node, spot_instance_request, max_register_duration, ) except Exception: # pylint: disable=broad-except self.logger.exception( 'Error processing spot instance request [%s]', spot_instance_request, ) finally: session.close() queue.task_done() self.logger.debug('Exiting worker')
def __init__( self, player_configuration: Optional[PlayerConfiguration] = None, *, avatar: Optional[int] = None, battle_format: str = "gen8randombattle", log_level: Optional[int] = None, max_concurrent_battles: int = 1, server_configuration: Optional[ServerConfiguration] = None, start_timer_on_battle_start: bool = False, start_listening: bool = True, team: Optional[Union[str, Teambuilder]] = None, ) -> None: """ :param player_configuration: Player configuration. If empty, defaults to an automatically generated username with no password. This option must be set if the server configuration requires authentication. :type player_configuration: PlayerConfiguration, optional :param avatar: Player avatar id. Optional. :type avatar: int, optional :param battle_format: Name of the battle format this player plays. Defaults to gen8randombattle. :type battle_format: str :param log_level: The player's logger level. :type log_level: int. Defaults to logging's default level. :param max_concurrent_battles: Maximum number of battles this player will play concurrently. If 0, no limit will be applied. Defaults to 1. :type max_concurrent_battles: int :param server_configuration: Server configuration. Defaults to Localhost Server Configuration. :type server_configuration: ServerConfiguration, optional :param start_listening: Whether to start listening to the server. Defaults to True. :type start_listening: bool :param start_timer_on_battle_start: Whether to automatically start the battle timer on battle start. Defaults to False. :type start_timer_on_battle_start: bool :param team: The team to use for formats requiring a team. Can be a showdown team string, a showdown packed team string, of a ShowdownTeam object. Defaults to None. :type team: str or Teambuilder, optional """ if player_configuration is None: player_configuration = _create_player_configuration_from_player(self) if server_configuration is None: server_configuration = LocalhostServerConfiguration super(Player, self).__init__( player_configuration=player_configuration, avatar=avatar, log_level=log_level, server_configuration=server_configuration, start_listening=start_listening, ) self._format: str = battle_format self._max_concurrent_battles: int = max_concurrent_battles self._start_timer_on_battle_start: bool = start_timer_on_battle_start self._battles: Dict[str, AbstractBattle] = {} self._battle_semaphore: Semaphore = Semaphore(0) self._battle_start_condition: Condition = Condition() self._battle_count_queue: Queue = Queue(max_concurrent_battles) self._battle_end_condition: Condition = Condition() self._challenge_queue: Queue = Queue() if isinstance(team, Teambuilder): self._team = team elif isinstance(team, str): self._team = ConstantTeambuilder(team) else: self._team = None self.logger.debug("Player initialisation finished")
class Crawler: def __init__(self, baseUrl, dbCfg, listUrlInfo, picPath='./pic1', pageRows=10, maxCoroNum=20, tcpConnNum=20, maxMonitorCount=10): self.baseUrl = baseUrl.strip('/') + "/" self.picPath = picPath # 下载图片到本地的目录 self.queue = Queue(maxsize=50) # 队列,用于放要爬取的url,长度为500 self.dbCfg = dbCfg self.sem = asyncio.Semaphore(maxCoroNum) # 使用信号量限制并发的协程的最大数量 self.tcpConnNum = tcpConnNum # tcp连接池中最大并发连接数 self.session = None # 存放会话连接池的session对象 self.dbPool = None # Mysql连接池 self.listUrlInfo = listUrlInfo self.pageRows = pageRows # 每页列表页有多少篇文章 self.crawledUrl = set() # 用于去重的url集合 self.loop = None # 事件循环对象 self.lockForSql = asyncio.Lock() # 用于同步多个数据入库的协程的锁 self.monitorCount = 0 # 任务监控计数器 self.maxMonitorCount = maxMonitorCount # 开启事件循环(启动协程) def startLoop(self): self.loop = asyncio.get_event_loop() asyncio.ensure_future(self.start( )) # 这里用create_task()会报错,但是ensure_future()不会,两个方法都可以将协程放到事件循环中运行. self.loop.run_forever() # 必须使用run_forever()不能用run_until_complete() # self.loop.run_until_complete(self.start()) # 将主协程放入事件循环的任务列表中开始运行主携程,这行代码会等待 # self.loop.run_until_complete(asyncio.sleep(0.25)) # 所有协程结束运行后,睡个0.25秒,让所有tcp连接,mysql连接都断开后才结束整个线程以免报警告 # 主协程 async def start(self): # 在开始爬取之前,先创建mysql连接池 self.pool = await aiomysql.create_pool(loop=self.loop, maxsize=100, minsize=100, pool_recycle=100, **self.dbCfg) # 开启 produceListUrl 和 consume 这两个协程,将他们加入到任务列表中开始执行 asyncio.create_task(self.produceListUrl()) asyncio.create_task(self.consume()) # 开启一个monitor协程用于所有任务完成的时候停止事件循环 # asyncio.create_task(self.monitor()) # 用于监控任务队列 self.queue 是否长时间没有任务, 如果检查到self.queue为空的次数超过self.maxMonitorCount规定的次数则停止事件循环,结束整个线程 async def monitor(self): while True: if self.queue.qsize() == 0: self.monitorCount += 1 else: self.monitorCount = 0 if self.monitorCount >= self.maxMonitorCount: print("所有任务已完成, 事件循环停止") self.pool.close() # 先关闭mysql连接池 await self.session.close() # 先关闭tcp连接池 self.loop.stop() # 然后才关闭事件循环 # 每0.5秒检测一次任务队列 await asyncio.sleep(0.5) # 生成列表页url(是api接口) async def produceListUrl(self): for listName, listInfo in self.listUrlInfo.items(): for page in range(int(listInfo['page'])): listUrl = self.baseUrl + "artlist.php?tid=" + str( listInfo['tid']) + "&start=" + str( int(page) * self.pageRows) # 将listUrl放到要爬取的url队列中,这个过程可能发生等待 await self.queue.put({"url": listUrl, "tname": listName}) # 消费者,用于从url队列中取出url进行爬取 async def consume(self): # 在开始爬取之前,先创建tcp连接池 connector = aiohttp.TCPConnector(ssl=False, limit=self.tcpConnNum) async with aiohttp.ClientSession(connector=connector) as self.session: while True: # 不停的从self.queue中取出task任务,task任务是我自己封装的一个字典,包含要爬取的url和其他属性 task = await self.queue.get( ) # 从自定义的asyncio队列中取出任务,这个操作可能发生等待(当队列中没有任务时,get方法会等待) self.sem.acquire( ) # 信号量限制并发协程的个数,当并发的coro协程数量超过 maxCoroNum的时候,此行代码会发生等待而切换协程 print("取出任务:" + str(task)) if task['url'].find('images') > 0 or task['url'].find( 'img') > 0: # 说明该任务是爬取图片 coro = self.crawlPicture(task) elif task['url'].find('view') > 0: # 该url是详情页页url coro = self.crawlDetailUrl(task) self.crawledUrl.add(task['url']) # 将这个详情页url设为已爬取过的url else: coro = self.crawlListUrl(task) # 上面这几句只是生成一个协程对象而已,下面这句才是将协程放到任务队列中并发运行 asyncio.create_task(coro) # 爬取单个url, type为1返回utf-8的编码格式,2是返回json格式,3是返回二进制格式 async def getUrl(self, url, type=1): # async with self.sem: # 信号量限制getUrl协程的个数,当并发的getUrl协程数量超过 maxCoroNum的时候,此行代码会发生等待而切换协程 try: async with self.session.get(url) as resp: # 请求url,返回resp对象内含响应内容 if resp.status in [200, 201]: if type == 1: responseBody = await resp.text() elif type == 2: # 这里有个小坑,resp.json()方法有一个参数content_type默认为'application/json',如果爬取到的页面的content-type和json方法的content_type参数不同就会报错(详情可以查看resp.json源码) # 而我要爬的列表页的响应头的content-type是"text/html", 所以这里应该将content_type参数设置为None,即不校验 content-type responseBody = await resp.json(content_type=None, encoding='utf-8') else: responseBody = await resp.read() print("成功爬取 " + url) return responseBody else: print("响应码错误:%s | %s" % (str(resp.status), resp.url)) return None except BaseException as e: print("发生错误:", e) return None # 爬取列表页url,获取其中的详情页url async def crawlListUrl(self, task): # try: listUrl, tname = task['url'], task['tname'] json_data = await self.getUrl(listUrl, type=2) # 返回的是json字典,里面包含多个详情页url的id for data in json_data: detailUrl = self.__joinUrl(data['id'], tname) # 根据详情页id得到详情页url if task['url'] in self.crawledUrl: # 判断详情页url是否已经爬取过 continue else: await self.queue.put({"url": detailUrl}) # 将详情页url放入队列待爬取 self.sem.release() # except BaseException as e: # print(task) # print("出现错误: " , e) # 爬取详情页url async def crawlDetailUrl(self, task): try: html = await self.getUrl(task['url']) # 解析html中的内容,返回data字典和imgSrc列表 data, imgSrcs = self.parseHtml(html, task['url']) # 纯cpu操作,没有io,无需await # 执行数据入库,将数据入库的协程放入任务列表中并发运行, 下面我用了2种写法, 两种都行,后者会边爬边入库,前者会一直爬爬了很多很多url后才开始入库 asyncio.create_task(self.insertDb( data)) # 把 insertDb 放到事件循环并发执行,create_task方法不会发生任何等待 # await self.insertDb(data) # 让crawlDetailUrl协程 等待 insertDb 子协程运行完 # 将图片放入到url队列中,让负责下载图片的那个协程从队列取出url下载图片 # (await self.queue.put({ "url": src, "fp":localPath}) for localPath, src in imgSrcs.items()) # 傻呀,用什么生成器表达式呀,生成器表达式定义的时候,里面的代码不会执行的! [ await self.queue.put({ "url": src, "fp": localPath }) for localPath, src in imgSrcs.items() ] except BaseException as e: print("爬取详情页发生错误: ", e) finally: self.sem.release() # 爬取详情页中的图片 async def crawlPicture(self, task): print("爬取图片") print(task) src, localPath = task['url'], task['fp'] localPathList = localPath.split('/') localPathList.pop() dirPath = '/'.join(localPathList) if not os.path.exists(dirPath): os.makedirs(dirPath) # 下载图片 blob = await self.getUrl(src, type=3) # 将图片异步写入到本地文件 try: async with aiofiles.open(localPath, mode='wb') as f: await f.write(blob) except BaseException as e: print("图片写入本地错误:", e) finally: self.sem.release() f.close() # 数据入库 async def insertDb(self, data): try: async with self.pool.acquire() as conn: # 从mysql连接池获取一个连接 cursor = await conn.cursor() # 获取光标 # async with conn.cursor() as cursor: # 获取光标 data['id'] = None content = data.pop("content") # 将content字段从字典中取出 fieldNameStr = '`,`'.join(data.keys()) # 拼接字段名 fieldNameStr = '(`' + fieldNameStr + "`)" fieldValueStr = '%s,' * len(data) fieldValueStr = "(" + fieldValueStr.strip(",") + ")" sqlArt = "insert ignore into article " + fieldNameStr + " values " + fieldValueStr sqlArtContent = 'insert into art_content (`aid`, `content`) values (%s, %s)' # 数据插入文章表 和 文章详情表,由于需要获取插入到文章表的id作为下次插入文章详情表的aid,所以需要把这两条插入语句作为1个原子操作 # 可以通过加锁的方式把这两句插入语句的执行作为原子操作 async with self.lockForSql: try: await cursor.execute(sqlArt, tuple(data.values())) #数据插入文章表 await cursor.execute(sqlArtContent, (cursor.lastrowid, content) ) # 文章内容插入文章详情表 print("数据入库成功: %s" % data['url']) except BaseException as e: print("数据插入报错") print(e) finally: # 关闭cursor await cursor.close() except BaseException as e: print("mysql 连接错误:", e) print("mysql连接错误对应url %s" % data['url']) # 拼接详情页url def __joinUrl(self, article_id, cate_name): return self.baseUrl.strip("/") + "/" + cate_name + '/' + 'view/' + str( article_id) + ".html" # 解析详情页html内容 def parseHtml(self, html, detailUrl): soup = BeautifulSoup(html, "html.parser") data = {} data['title'] = soup.find('h1').get_text() artInfo = soup.find('div', class_="artinfo").find_all( "span") # 文章的发布时间,观看数和来源 data['time'] = self.__strtotime(artInfo[0].get_text()) data['view'] = int(artInfo[1].get_text().strip('')) data['source'] = artInfo[2].get_text().replace( "来源:", '') if len(artInfo) > 2 else '' data['url'] = detailUrl data['tid'] = str([ val['tid'] for key, val in self.listUrlInfo.items() if key in detailUrl ][0]) # content字段单独处理 content = str(soup.find('div', class_='article')) data['content'], imgSrcs = self.handleContent( content ) # 该方法会将content中的图片地址获取,并将content中的<img>标签替换为图片下载后的本地路径(但是不会执行下载图片) return data, imgSrcs # 处理文章Content,搜集文章内容中的图片并返回 def handleContent(self, content): # 用正则将content中的img标签都提取出来 pattern = re.compile('<img.*?src=["\'](.*?)["\'].*?>', re.DOTALL) # 将内容标签变为字符串类型 contentStr = str(content) # 正则替换contentStr中的图片src为下载到本地后的图片地址。并将远程图片的Src记录下来放到队列中待爬 imgSrcs = {} # 存放远程图片Src用于之后爬取图片 regRes = pattern.findall(contentStr) for src in regRes: localPath = self.renamePic(src, self.picPath) imgSrcs[localPath] = src contentStr = contentStr.replace(src, localPath) # contentStr = pattern.sub(lambda x: (imgSrcs.append(x.group(1)), x.group(0).replace(x.group(1), self.renamePic(x.group(1))))[-1],contentStr) return contentStr, imgSrcs # 替换文章Src def renamePic(self, picUrl, dirPath): dirPath = dirPath.strip('/') + '/' # 存放下载图片的目录路径 m = hashlib.md5() m.update(picUrl.encode()) fn = m.hexdigest() fn = fn[8:24] + '.jpg' return dirPath + fn def __strtotime(self, strTime): # 先转换为时间数组 timeArray = time.strptime(strTime, "%Y-%m-%d %H:%M:%S") # 转换为时间戳 timeStamp = int(time.mktime(timeArray)) return timeStamp
class SubscribeListener(SubscribeCallback): def __init__(self): self.connected = False self.connected_event = Event() self.disconnected_event = Event() self.presence_queue = Queue() self.message_queue = Queue() self.error_queue = Queue() def status(self, pubnub, status): if utils.is_subscribed_event( status) and not self.connected_event.is_set(): self.connected_event.set() elif utils.is_unsubscribed_event( status) and not self.disconnected_event.is_set(): self.disconnected_event.set() elif status.is_error(): self.error_queue.put_nowait(status.error_data.exception) def message(self, pubnub, message): self.message_queue.put_nowait(message) def presence(self, pubnub, presence): self.presence_queue.put_nowait(presence) async def _wait_for(self, coro): scc_task = asyncio.ensure_future(coro) err_task = asyncio.ensure_future(self.error_queue.get()) await asyncio.wait([scc_task, err_task], return_when=asyncio.FIRST_COMPLETED) if err_task.done() and not scc_task.done(): if not scc_task.cancelled(): scc_task.cancel() raise err_task.result() else: if not err_task.cancelled(): err_task.cancel() return scc_task.result() async def wait_for_connect(self): if not self.connected_event.is_set(): await self._wait_for(self.connected_event.wait()) else: raise Exception("instance is already connected") async def wait_for_disconnect(self): if not self.disconnected_event.is_set(): await self._wait_for(self.disconnected_event.wait()) else: raise Exception("instance is already disconnected") async def wait_for_message_on(self, *channel_names): channel_names = list(channel_names) while True: try: env = await self._wait_for(self.message_queue.get()) if env.channel in channel_names: return env else: continue finally: self.message_queue.task_done() async def wait_for_presence_on(self, *channel_names): channel_names = list(channel_names) while True: try: env = await self._wait_for(self.presence_queue.get()) if env.channel in channel_names: return env else: continue finally: self.presence_queue.task_done()
import os import sys import fcntl import subprocess import asyncio from asyncio import Queue sys.path.append("./") from sysutils.asynchronous.broker.rabbitmq.rabbitmq_producer \ import connect_params, TrivialRabbitProducer from ahome import settings queue = Queue() # CELERY_WORKER_COMMAND = ['celery', 'worker', '-A', 'ahome', '-l', 'debug'] CELERY_WORKER_COMMAND = settings.CELERY_WORKER_COMMAND def turn_to_unblocking(fd): fl = fcntl.fcntl(fd, fcntl.F_GETFL) fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK) async def process_line(line, name="stdout"): if line: line = line[:-1] print(str(line, encoding='utf-8'), sep="") await queue.put({'module': "CELERY.OUT", name: line})
def __init__(self): self._lastUseTime = time.time() self._queue = Queue() # 协程的队列
async def drain_queue_async(cls, q: asyncio.Queue, timeout: Optional[int] = None) -> Tuple[bool, list]: try: elem = await asyncio.wait_for(q.get(), timeout=timeout) if timeout else await q.get() return cls.__handle_queue_update(q, elem) except TimeoutError: return False, []
def __init__(self): self.mailbox = Queue() self.loop_id = 0 self.reentrant_id = 0 self.last_message_time = time.time()
async def test_no_job_is_polled(self, job_poller: JobPoller, queue: asyncio.Queue): await job_poller.poll_once() assert queue.empty()
def initialize(self): self.messages = Queue() self.publish()
import asyncio import logging import sys import time from asyncio import Queue from dbp_files import * from elasticsearch_async import AsyncElasticsearch from es_helpers import * from pprint import pprint logger = logger(__name__, 'info') logger.info('file("%s").class("%s") Initialized.' % (__file__, __name__)) q = Queue() op_dict = {"index": {"_index": 'dbpedia', "_type": 'dbpedia', "_id": ''}} es = ElasticsearchHelper(port='9200', loglevel='info') es.create_index(delete_first=True) COMMIT_SIZE = 100 PATH = './dbpedia_index/current/nl/' #'/opt/home/wfa011/code/create_dbpedia_index/3.0/current/nl/' PATH = '/home/aloha/nobackup/dbp/' disambiguations = Disambiguations() disambiguations.PATH = PATH labels = Labels()
def initialize(self, sub_server, resolvers): self.queue = Queue(100) self.subscription_server = sub_server self.resolvers = resolvers
def create_new_task(self): print(asyncio.get_event_loop()) self.task = UserParserJob(Queue())
async def gmail_worker(name: str, long_queue: asyncio.Queue, db_callback: Callable[[ List[Tuple[str, Generator[Dict[str, 'Table'], None, None]]], str, Optional[str] ], None], *args, **kwargs) -> None: while True: try: # job will be a query to Gmail API from Gmail interface job = await long_queue.get() except long_queue.Empty: print(f'{name} sleeping for 5') await asyncio.sleep(5) continue size = long_queue.qsize() try: results, user_uuid = await job(*args, **kwargs) except Exception as e: print(f'something went wrong with the current task: {e}') return msg_objs = ({ 'owner': user_uuid, 'message_id': node.msg_id, 'thread_id': node.thread_id, 'last_fetch': datetime.now() } for node in results) comm_nodes = ({ 'message_id': node.msg_id, 'html_body': node.html_body, 'text_body': node.plaintext_body, 'mimetypes': node.mimetypes, 'ip_address': node.ip_address, 'subject': node.subject, 'date': node.date, 'keywords': json.dumps(node.keywords), 'labels': node.labels } for node in results) # flattens all the Entity arrays nested in array of comm nodes all_entities = list( itertools.chain.from_iterable([node.entities for node in results])) entities = ({ 'email': entity.email, 'name': entity.name, 'domain': entity.domain, 'msg_id': entity.msg_id, 'poc': entity.poc.name } for entity in all_entities) # free up memory del all_entities try: executables = [('msg_objs', msg_objs), ('comm_nodes', comm_nodes), ('entities', entities)] await db_callback(executables, user_uuid) except Exception as e: print(f'Error sending results to next long_queue: {e}') break print( f"{name} has completed a task from {long_queue} with {size} remaining. Sleeping for 3 seconds... \n\n" ) await asyncio.sleep(3) return
class Inserter: exchange: Exchange create_client: Callable _cryptoxlib_client: CryptoXLibClient = None _postgres_connection: Connection = None _queue: Queue = attr.ib(factory=Queue) _queue_size: int = 20 _complete = False async def __aenter__(self, *args, **kwargs): await self._aenter(*args, **kwargs) # type: ignore return self async def __aexit__(self, *args, **kwargs): await self._cryptoxlib_client.close() await self._postgres_connection.close() async def _aenter(self): self._queue = Queue(self._queue_size) self._postgres_connection = await get_postgres_connection() self._cryptoxlib_client: CryptoXLibClient = self.create_client( api_key=config["exchanges"][self.exchange.name]["api_key"], sec_key=config["exchanges"][self.exchange.name]["security_key"], ) async def run(self) -> None: def log(message: str): logger.info( message, extra={ "kind": self.__class__.__name__, "exchange": self.exchange.name, "queue_size": self._queue_size, }, ) log("starting") try: await gather( self._gather(), self._insert(), ) except CancelledError: log("cancelled") log("inserting remaining enqueued") await self._insert() async def _insert(self) -> None: total: int = 0 duplicates: int = 0 while True: def log(message: str): logger.info( message, extra={ "kind": self.__class__.__name__, "exchange": self.exchange.name, "total": total, "duplicates": duplicates, "queue_size": self._queue_size, }, ) if self._queue.empty(): if self._complete: log("finished inserting") return await sleep(2) continue log("inserting") while not self._queue.empty(): trade: Trade = await self._queue.get() try: await self._postgres_connection.copy_records_to_table( f"{self.exchange.name}_trade", records=[as_postgres_row(trade)]) except UniqueViolationError: duplicates += 1 total += 1 async def _gather(self) -> None: raise NotImplementedError("Abstract method")
def add_listener(self) -> Queue: """Add a listener""" listener: Queue = Queue() self._listeners.append(listener) return listener
def __init__(self, ack_callback: Callable[[int], None]) -> None: self._ack_callback = ack_callback self._eof = False self._unacked: 'Queue[UnackedData]' = Queue() self._acked: 'Deque[AckedData]' = deque() self._acked_size = 0
class LiveChatAsync: '''asyncio(aiohttp)を利用してYouTubeのライブ配信のチャットデータを取得する。 Parameter --------- video_id : str 動画ID seektime : int (ライブチャット取得時は無視) 取得開始するアーカイブ済みチャットの経過時間(秒) マイナス値を指定した場合は、配信開始前のチャットも取得する。 processor : ChatProcessor チャットデータを加工するオブジェクト buffer : Buffer(maxsize:20[default]) チャットデータchat_componentを格納するバッファ。 maxsize : 格納できるchat_componentの個数 default値20個。1個で約5~10秒分。 interruptable : bool Ctrl+Cによる処理中断を行うかどうか。 callback : func _listen()関数から一定間隔で自動的に呼びだす関数。 done_callback : func listener終了時に呼び出すコールバック。 exception_handler : func 例外を処理する関数 direct_mode : bool Trueの場合、bufferを使わずにcallbackを呼ぶ。 Trueの場合、callbackの設定が必須 (設定していない場合IllegalFunctionCall例外を発生させる) force_replay : bool Trueの場合、ライブチャットが取得できる場合であっても 強制的にアーカイブ済みチャットを取得する。 topchat_only : bool Trueの場合、上位チャットのみ取得する。 Attributes --------- _is_alive : bool チャット取得を停止するためのフラグ ''' _setup_finished = False def __init__(self, video_id, seektime=0, processor=DefaultProcessor(), buffer=None, interruptable=True, callback=None, done_callback=None, exception_handler=None, direct_mode=False, force_replay=False, topchat_only=False): self.video_id = video_id self.seektime = seektime if isinstance(processor, tuple): self.processor = Combinator(processor) else: self.processor = processor self._buffer = buffer self._callback = callback self._done_callback = done_callback self._exception_handler = exception_handler self._direct_mode = direct_mode self._is_alive = True self._is_replay = force_replay self._parser = Parser(is_replay=self._is_replay) self._pauser = Queue() self._pauser.put_nowait(None) self._setup() self._first_fetch = True self._fetch_url = "live_chat/get_live_chat?continuation=" self._topchat_only = topchat_only if not LiveChatAsync._setup_finished: LiveChatAsync._setup_finished = True if exception_handler == None: self._set_exception_handler(self._handle_exception) else: self._set_exception_handler(exception_handler) if interruptable: signal.signal(signal.SIGINT, (lambda a, b: asyncio.create_task( LiveChatAsync.shutdown(None, signal.SIGINT, b)))) def _setup(self): #direct modeがTrueでcallback未設定の場合例外発生。 if self._direct_mode: if self._callback is None: raise IllegalFunctionCall( "When direct_mode=True, callback parameter is required.") else: #direct modeがFalseでbufferが未設定ならばデフォルトのbufferを作成 if self._buffer is None: self._buffer = Buffer(maxsize=20) #callbackが指定されている場合はcallbackを呼ぶループタスクを作成 if self._callback is None: pass else: #callbackを呼ぶループタスクの開始 loop = asyncio.get_event_loop() loop.create_task(self._callback_loop(self._callback)) #_listenループタスクの開始 loop = asyncio.get_event_loop() listen_task = loop.create_task(self._startlisten()) #add_done_callbackの登録 if self._done_callback is None: listen_task.add_done_callback(self.finish) else: listen_task.add_done_callback(self._done_callback) async def _startlisten(self): """Fetch first continuation parameter, create and start _listen loop. """ initial_continuation = liveparam.getparam(self.video_id, 3) await self._listen(initial_continuation) async def _listen(self, continuation): ''' Fetch chat data and store them into buffer, get next continuaiton parameter and loop. Parameter --------- continuation : str parameter for next chat data ''' try: async with aiohttp.ClientSession() as session: while (continuation and self._is_alive): continuation = await self._check_pause(continuation) contents = await self._get_contents( continuation, session, headers) metadata, chatdata = self._parser.parse(contents) timeout = metadata['timeoutMs'] / 1000 chat_component = { "video_id": self.video_id, "timeout": timeout, "chatdata": chatdata } time_mark = time.time() if self._direct_mode: await self._callback( self.processor.process([chat_component])) else: await self._buffer.put(chat_component) diff_time = timeout - (time.time() - time_mark) await asyncio.sleep(diff_time) continuation = metadata.get('continuation') except ChatParseException as e: #self.terminate() logger.debug(f"[{self.video_id}]{str(e)}") return except (TypeError, json.JSONDecodeError): #self.terminate() logger.error(f"{traceback.format_exc(limit = -1)}") return logger.debug(f"[{self.video_id}]finished fetching chat.") async def _check_pause(self, continuation): if self._pauser.empty(): '''pause''' await self._pauser.get() '''resume: prohibit from blocking by putting None into _pauser. ''' self._pauser.put_nowait(None) if not self._is_replay: continuation = liveparam.getparam(self.video_id, 3, self._topchat_only) return continuation async def _get_contents(self, continuation, session, headers): '''Get 'contents' dict from livechat json. If contents is None at first fetching, try to fetch archive chat data. Return: ------- 'contents' dict which includes metadata & chatdata. ''' livechat_json = (await self._get_livechat_json(continuation, session, headers)) contents = self._parser.get_contents(livechat_json) if self._first_fetch: if contents is None or self._is_replay: '''Try to fetch archive chat data.''' self._parser.is_replay = True self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation=" continuation = arcparam.getparam(self.video_id, self.seektime, self._topchat_only) livechat_json = (await self._get_livechat_json( continuation, session, headers)) contents = self._parser.get_contents(livechat_json) self._first_fetch = False return contents async def _get_livechat_json(self, continuation, session, headers): ''' Get json which includes chat data. ''' continuation = urllib.parse.quote(continuation) livechat_json = None status_code = 0 url = ( f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1") for _ in range(MAX_RETRY + 1): async with session.get(url, headers=headers) as resp: try: text = await resp.text() livechat_json = json.loads(text) break except (ClientConnectorError, json.JSONDecodeError): await asyncio.sleep(1) continue else: logger.error(f"[{self.video_id}]" f"Exceeded retry count. status_code={status_code}") return None return livechat_json async def _callback_loop(self, callback): """ コンストラクタでcallbackを指定している場合、バックグラウンドで callbackに指定された関数に一定間隔でチャットデータを投げる。 Parameter --------- callback : func 加工済みのチャットデータを渡す先の関数。 """ while self.is_alive(): items = await self._buffer.get() data = self.processor.process(items) await callback(data) async def get(self): """ bufferからデータを取り出し、processorに投げ、 加工済みのチャットデータを返す。 Returns : Processorによって加工されたチャットデータ """ if self._callback is None: items = await self._buffer.get() return self.processor.process(items) raise IllegalFunctionCall("既にcallbackを登録済みのため、get()は実行できません。") def is_replay(self): return self._is_replay def pause(self): if self._callback is None: return if not self._pauser.empty(): self._pauser.get_nowait() def resume(self): if self._callback is None: return if self._pauser.empty(): self._pauser.put_nowait(None) def is_alive(self): return self._is_alive def finish(self, sender): '''Listener終了時のコールバック''' try: self.terminate() except CancelledError: logger.debug(f'[{self.video_id}]cancelled:{sender}') def terminate(self): ''' Listenerを終了する。 ''' self._is_alive = False if self._direct_mode == False: #bufferにダミーオブジェクトを入れてis_alive()を判定させる self._buffer.put_nowait({'chatdata': '', 'timeout': 0}) logger.info(f'[{self.video_id}]finished.') @classmethod def _set_exception_handler(cls, handler): loop = asyncio.get_event_loop() loop.set_exception_handler(handler) @classmethod def _handle_exception(cls, loop, context): if not isinstance(context["exception"], CancelledError): logger.error(f"Caught exception: {context}") loop = asyncio.get_event_loop() loop.create_task(cls.shutdown(None, None, None)) @classmethod async def shutdown(cls, event, sig=None, handler=None): logger.debug("shutdown...") tasks = [ t for t in asyncio.all_tasks() if t is not asyncio.current_task() ] [task.cancel() for task in tasks] logger.debug(f"complete remaining tasks...") await asyncio.gather(*tasks, return_exceptions=True) loop = asyncio.get_event_loop() loop.stop()
class MPV: def __init__( self, media="", socket=None, mpv_path="/usr/bin/mpv", mpv_args=["--no-audio-display"], log_callback=None, log_level="error"): """ Create an MPV instance. if you specify a socket, this will not create a new instance and will instead connect to that one. If not it will start a new MPV instance according to the mpv_path argument and connect to it. Optionally you can specify a path or URL of a media file to play. """ self.properties = set() self.event_bindings = {} self.property_bindings = {} self.key_bindings = {} self.unbound_key_callback = None self.observer_id = 1 self.keybind_id = 1 self.loop = get_event_loop() self.media = media self.mpv_args = mpv_args self.socket = socket self.mpv_path = mpv_path self.log_callback = log_callback self.log_level = log_level self.reader, self.writer = None, None self.process = None self.event_queue = Queue() self.wait_queue = None self.command_responses = {} self.tasks = [] self.rid = 0 self.command_lock = Lock() def _cleanup(self): try: unlink("/tmp/mpv-socket.sock") except: pass async def _start_mpv(self): self._cleanup() self.process = await create_subprocess_exec( self.mpv_path, "--input-ipc-server=/tmp/mpv-socket.sock", self.media, *self.mpv_args, stdout=DEVNULL, stderr=DEVNULL ) self.socket = "/tmp/mpv-socket.sock" async def _process_events(self): while True: data = await self.reader.readline() try: json_data = json.loads(data) except json.decoder.JSONDecodeError: break logger.debug(json_data) if "request_id" in json_data and json_data["request_id"] in self.command_responses: self.command_responses[json_data["request_id"]].set_response(json_data) else: await self.event_queue.put(json_data) if self.wait_queue: await self.wait_queue.put(json_data) async def _event_dispatcher(self): while True: data = await self.event_queue.get() if data["event"] in self.event_bindings: params = { k: v for k, v in data.items() if k != "event"} for coro in self.event_bindings[data["event"]]: self.loop.create_task(coro(**params)) async def _stop(self): for task in self.tasks: task.cancel() self.writer.close() await self.writer.wait_closed() self._cleanup() async def _wait_destroy(self): await self.wait_complete() await self._stop() async def send(self, arguments): """ Coroutine. Sends a command, waits and returns the response. """ async with self.command_lock: self.rid += 1 self.command_responses[self.rid] = ResponseEvent() data = json.dumps({ "command": arguments, "request_id": self.rid })+"\n" logger.debug(f"command: {data}") data = data.encode("utf-8") self.writer.write(data) await self.writer.drain() response = await self.command_responses[self.rid].wait() del self.command_responses[self.rid] return response async def command(self, *args): result = await self.send(args) if result.get("error") != "success": raise MPVError("mpv command returned error: %s" %(result)) return result.get("data") def listen_for(self, event, func): """ Decorator. This will add a coroutine to be used as a callback for the event specified in the event argument """ if event in self.event_bindings: self.event_bindings[event].append(func) else: self.event_bindings[event] = [func] async def get_events(self, event=None, timeout=None): """ Async generator. This will yield events as dictionaries """ self.wait_queue = Queue() while True: if timeout: try: data = await asyncio.wait_for(self.wait_queue.get(), timeout) except asyncio.TimeoutError: return else: data = await self.wait_queue.get() if not event or data["event"] == event: yield data self.wait_queue = None async def start(self): """ Coroutine. Start this MPV instance. """ if not self.socket: await self._start_mpv() for _ in range(100): try: self.reader, self.writer = await open_unix_connection(self.socket) self.tasks = [ self.loop.create_task(self._process_events()), self.loop.create_task(self._event_dispatcher()) ] break except FileNotFoundError: await sleep(0.1) self.properties = set( p.replace("-", "_") for p in await self.command("get_property", "property-list") ) self.listen_for("property-change", self.on_property_change) self.listen_for("client-message", self.on_client_message) if self.log_callback is not None and self.log_level is not None: await self.command("request_log_messages", self.log_level) self.listen_for("log-message", self.on_log_message) if self.process: self.loop.create_task(self._wait_destroy()) async def on_log_message(self, level, prefix, text): await self.log_callback(level, prefix, text.strip()) async def on_client_message(self, args): if len(args) == 2 and args[0] == "custom-bind": self.loop.create_task(self.key_bindings[args[1]]()) elif ( self.unbound_key_callback and len(args) == 5 and args[0] == "key-binding" and args[1] == "unmapped-keypress" and args[2][0] == "d" ): self.loop.create_task(self.unbound_key_callback(*args[2:])) async def on_property_change(self, id, name, data): if id in self.property_bindings: propname, callback = self.property_bindings[id] self.loop.create_task(callback(name, data)) def bind_property_observer(self, name, callback): """ Bind a callback to an MPV property change. *name* is the property name. *callback(name, data)* is the function to call. Returns a unique observer ID needed to destroy the observer. """ observer_id = self.observer_id self.observer_id += 1 self.property_bindings[observer_id] = name, callback self.loop.create_task(self.command("observe_property", observer_id, name)) return observer_id def unbind_property_observer(self, name_or_id): if isinstance(name_or_id, int) and name_or_id in self.property_bindings: del self.property_bindings[name_or_id] elif isinstance(name_or_id, str): self.property_bindings = { id: (propname, callback) for id, (propname, callback) in self.property_bindings.items() if propname != name_or_id } async def bind_key_press(self, name, callback): """ Bind a callback to an MPV keypress event. *name* is the key symbol. *callback()* is the function to call. """ keybind_id = self.keybind_id self.keybind_id += 1 bind_name = "bind{0}".format(keybind_id) self.key_bindings["bind{0}".format(keybind_id)] = callback try: await self.command("keybind", name, "script-message custom-bind {0}".format(bind_name)) except MPVError: await self.command( "define_section", bind_name, "{0} script-message custom-bind {1}".format(name, bind_name) ) await self.command("enable_section", bind_name) async def register_unbound_key_callback(self, callback): self.unbound_key_callback = callback await self.command("keybind", "UNMAPPED", "script-binding unmapped-keypress") def on_key_press(self, name): """ Decorator to bind a callback to an MPV keypress event. @on_key_press(key_name) def my_callback(): pass """ def wrapper(func): self.bind_key_press(name, func) return func return wrapper def on_event(self, name): """ Decorator to bind a callback to an MPV event. @on_event(name) def my_callback(event_data): pass """ def wrapper(func): self.listen_for(name, func) return func return wrapper async def wait_complete(self): """ Coroutine. Wait for the player to exit. Works when the MPV instance is managed by the library. """ await self.process.wait() async def stop(self): """ Coroutine. Stop this MPV instance. """ try: await self.send(["quit"]) await self.process.wait() except: pass def __del__(self): self.loop.create_task(self.stop())
class AsyncioSubscriptionManager(SubscriptionManager): def __init__(self, pubnub_instance): subscription_manager = self self._message_worker = None self._message_queue = Queue() self._subscription_lock = Semaphore(1) self._subscribe_loop_task = None self._heartbeat_periodic_callback = None self._reconnection_manager = AsyncioReconnectionManager( pubnub_instance) super(AsyncioSubscriptionManager, self).__init__(pubnub_instance) self._start_worker() class AsyncioReconnectionCallback(ReconnectionCallback): def on_reconnect(self): subscription_manager.reconnect() pn_status = PNStatus() pn_status.category = PNStatusCategory.PNReconnectedCategory pn_status.error = False subscription_manager._subscription_status_announced = True subscription_manager._listener_manager.announce_status( pn_status) self._reconnection_listener = AsyncioReconnectionCallback() self._reconnection_manager.set_reconnection_listener( self._reconnection_listener) def _set_consumer_event(self): if not self._message_worker.cancelled(): self._message_worker.cancel() def _message_queue_put(self, message): self._message_queue.put_nowait(message) def _start_worker(self): consumer = AsyncioSubscribeMessageWorker(self._pubnub, self._listener_manager, self._message_queue, None) self._message_worker = asyncio.ensure_future( consumer.run(), loop=self._pubnub.event_loop) def reconnect(self): # TODO: method is synchronized in Java self._should_stop = False self._subscribe_loop_task = asyncio.ensure_future( self._start_subscribe_loop()) self._register_heartbeat_timer() def disconnect(self): # TODO: method is synchronized in Java self._should_stop = True self._stop_heartbeat_timer() self._stop_subscribe_loop() def stop(self): super(AsyncioSubscriptionManager, self).stop() self._reconnection_manager.stop_polling() if self._subscribe_loop_task and not self._subscribe_loop_task.cancelled( ): self._subscribe_loop_task.cancel() async def _start_subscribe_loop(self): self._stop_subscribe_loop() await self._subscription_lock.acquire() combined_channels = self._subscription_state.prepare_channel_list(True) combined_groups = self._subscription_state.prepare_channel_group_list( True) if len(combined_channels) == 0 and len(combined_groups) == 0: self._subscription_lock.release() return self._subscribe_request_task = asyncio.ensure_future( Subscribe(self._pubnub).channels(combined_channels).channel_groups( combined_groups).timetoken(self._timetoken).region( self._region).filter_expression( self._pubnub.config.filter_expression).future()) e = await self._subscribe_request_task if self._subscribe_request_task.cancelled(): self._subscription_lock.release() return if e.is_error(): if e.status and e.status.category == PNStatusCategory.PNCancelledCategory: self._subscription_lock.release() return if e.status and e.status.category == PNStatusCategory.PNTimeoutCategory: asyncio.ensure_future(self._start_subscribe_loop()) self._subscription_lock.release() return logger.error("Exception in subscribe loop: %s" % str(e)) if e.status and e.status.category == PNStatusCategory.PNAccessDeniedCategory: e.status.operation = PNOperationType.PNUnsubscribeOperation # TODO: raise error self._listener_manager.announce_status(e.status) self._reconnection_manager.start_polling() self._subscription_lock.release() self.disconnect() return else: self._handle_endpoint_call(e.result, e.status) self._subscription_lock.release() self._subscribe_loop_task = asyncio.ensure_future( self._start_subscribe_loop()) self._subscription_lock.release() def _stop_subscribe_loop(self): if self._subscribe_request_task is not None and not self._subscribe_request_task.cancelled( ): self._subscribe_request_task.cancel() def _stop_heartbeat_timer(self): if self._heartbeat_periodic_callback is not None: self._heartbeat_periodic_callback.stop() def _register_heartbeat_timer(self): super(AsyncioSubscriptionManager, self)._register_heartbeat_timer() self._heartbeat_periodic_callback = AsyncioPeriodicCallback( self._perform_heartbeat_loop, self._pubnub.config.heartbeat_interval * 1000, self._pubnub.event_loop) if not self._should_stop: self._heartbeat_periodic_callback.start() async def _perform_heartbeat_loop(self): if self._heartbeat_call is not None: # TODO: cancel call pass cancellation_event = Event() state_payload = self._subscription_state.state_payload() presence_channels = self._subscription_state.prepare_channel_list( False) presence_groups = self._subscription_state.prepare_channel_group_list( False) if len(presence_channels) == 0 and len(presence_groups) == 0: return try: heartbeat_call = (Heartbeat( self._pubnub).channels(presence_channels).channel_groups( presence_groups).state(state_payload).cancellation_event( cancellation_event).future()) envelope = await heartbeat_call heartbeat_verbosity = self._pubnub.config.heartbeat_notification_options if envelope.status.is_error: if heartbeat_verbosity in ( PNHeartbeatNotificationOptions.ALL, PNHeartbeatNotificationOptions.FAILURES): self._listener_manager.announce_status(envelope.status) else: if heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL: self._listener_manager.announce_status(envelope.status) except PubNubAsyncioException: pass # TODO: check correctness # if e.status is not None and e.status.category == PNStatusCategory.PNTimeoutCategory: # self._start_subscribe_loop() # else: # self._listener_manager.announce_status(e.status) finally: cancellation_event.set() def _send_leave(self, unsubscribe_operation): asyncio.ensure_future(self._send_leave_helper(unsubscribe_operation)) async def _send_leave_helper(self, unsubscribe_operation): envelope = await Leave(self._pubnub) \ .channels(unsubscribe_operation.channels) \ .channel_groups(unsubscribe_operation.channel_groups).future() self._listener_manager.announce_status(envelope.status)
def __init__(self, client, uri="/"): self.client = client self.uri = uri self.queue = Queue()
def __init__(self, maxsize=50): self.protocol: Consumer[Message] = bytes_protocol() self.queue = Queue(maxsize=maxsize) self._queue_is_full = False
def __init__(self, request): self.request = request self.queue = Queue()