async def session(): global _session if _session is None: _session = aiohttp.ClientSession( timeout=aiohttp.ClientTimeout(total=3), raise_for_status=True) return _session
async def loop(self): """ The main event loop that continues endlessly until the shutdown flag is activated """ # Create a session self.session = aiohttp.ClientSession( headers={ "User-Agent": "Siliqua/{} (Matoking@Github)".format( SILIQUA_VERSION ) }, timeout=aiohttp.ClientTimeout(total=self.NETWORK_TIMEOUT_SECONDS) ) logger.info("Starting RPC update loop") failed = False error = None while not self.shutdown_flag.is_set(): try: await self.check_node_version() await self.update_active_difficulty() await self.update_broadcast_blocks() await self.update_new_blocks() await self.update_pocketable_blocks() # Check if all accounts are synced self.connection_status.sync_complete = not any([ not status.sync_complete for status in self.account_sync_statuses.values() ]) self.connection_status.completed_rounds += 1 except (aiohttp.ClientError, asyncio.TimeoutError): # In case of an error, sleep for a bit and then try # again. This could happen if the node is just starting up await asyncio.sleep(self.NETWORK_ERROR_WAIT_SECONDS) continue except (InvalidBlock, InvalidSignature, UnsupportedProtocolVersion) as exc: # If invalid blocks are returned, abort and shutdown the # network plugin failed = True error = exc self.shutdown_flag.set() break except Exception as exc: logger.error( "Unexpected error during RPC network update: %s %s", str(exc), traceback.format_exc() ) await asyncio.sleep(self.NETWORK_ERROR_WAIT_SECONDS) continue # Sleep for a small moment between updates await asyncio.sleep(self.NETWORK_LOOP_WAIT_SECONDS) logger.info("Stopping RPC update loop") # Close the session on shutdown await self.session.close() if failed: logger.error( "Network server aborted due to a fatal error in RPC " "processor: %s", error ) self.connection_status.abort(error)
import pandera as pa from pandera.typing import Series, Category, String, Int, DateTime, DataFrame from pydantic import BaseModel, validator, Field, datetime_parse from typing import Union, List, Optional from datetime import datetime, date from json import JSONDecodeError headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/70.0.3538.77 Safari/537.36", } data = [] data_error = [] timeout = aiohttp.ClientTimeout(total=2400, connect=900) urls = [ f"https://covid19.ddc.moph.go.th/api/Cases/round-3-line-lists?page={i}" for i in range(1, 438) ] class CovidPatientDataframe(pa.SchemaModel): txn_date: Series[DateTime] = pa.Field(ge=datetime(2020, 1, 1), coerce=True) gender: Series[Category] age_number: Series[Int] = pa.Field(coerce=True) age_range: Series[String] job: Series[String] risk: Series[String] patient_type: Series[Category] province: Series[String]
async def process_source(filename): logging.info(f"Processing {filename}") headers = { "User-Agent": "Mozilla/5.0 (compatible; MSIE 6.0; ELI WMS sync )" } timeout = aiohttp.ClientTimeout(total=10) conn = aiohttp.TCPConnector(limit_per_host=2) async with ClientSession(headers=headers, timeout=timeout, connector=conn) as session: out_image = os.path.join( outdir, os.path.basename(filename).replace(".geojson", ".png")) if os.path.exists(out_image): return async with aiofiles.open(filename, mode="r", encoding="utf-8") as f: contents = await f.read() source = json.loads(contents) # Skip non tms layers if not source["properties"]["type"] in {"tms", "wms"}: return if "geometry" in source and source["geometry"] is not None: geom = shape(source["geometry"]) centroid = geom.representative_point() else: centroid = Point(0, 0) async def test_zoom(zoom): tile = mercantile.tile(centroid.x, centroid.y, zoom) if source["properties"]["type"] == "tms": url = await get_tms_image(tile, source, session) elif source["properties"]["type"] == "wms": url = await get_wms_image(tile, source, session) if url is None: return None, None, None try: status, img = await get_image(session, url) if status == ImageStatus.SUCCESS: image_hash = imagehash.average_hash(img) pal_image = Image.new("P", (1, 1)) pal_image.putpalette((0, 0, 0, 0, 255, 0, 255, 0, 0, 255, 255, 0) + (0, 0, 0) * 252) img_comp = img.convert("RGB").quantize(palette=pal_image) colors = img_comp.getcolors(1000) max_pixel_count = max([count for count, color in colors]) return image_hash, img, max_pixel_count except Exception as e: logging.error(e) return None, None, None image_hashes = {} max_pixel_counts = {} images = {} for zoom in range(20): image_hash, img, max_pixel_count = await test_zoom(zoom) images[zoom] = img image_hashes[zoom] = image_hash max_pixel_counts[zoom] = max_pixel_count # Getting images was not sucessful, nothing to do if len([zoom for zoom in range(20) if images[zoom] is None]) == len(range(20)): return def compare_neighbors(zoom): same_as_a_neighbor = False this_hash = image_hashes[zoom] if zoom - 1 >= 0: left_hash = image_hashes[zoom - 1] if left_hash == this_hash: same_as_a_neighbor = True if zoom + 1 < 20: right_hash = image_hashes[zoom + 1] if right_hash == this_hash: same_as_a_neighbor = True return same_as_a_neighbor def zoom_in_is_empty(zoom): if zoom + 1 < 20: if (image_hashes[zoom + 1] is None or max_count( str(image_hashes[zoom + 1]).upper().replace("F", "O")) == 16): return True return False # Find minzoom min_zoom = None for zoom in range(20): if image_hashes[zoom] is None: continue if zoom_in_is_empty(zoom): continue if max_count(str(image_hashes[zoom]).upper().replace("F", "O")) == 16: continue if not compare_neighbors(zoom): min_zoom = zoom break fig, axs = plt.subplots(2, 10, figsize=(15, 5)) for z in range(20): if z < 10: ax = axs[0][z] else: ax = axs[1][z - 10] ax.set_xlim(0, 256) ax.set_ylim(0, 256) if images[z] is not None: ax.imshow(images[z]) else: ax.text( 0.5, 0.5, "No data", horizontalalignment="center", verticalalignment="center", transform=ax.transAxes, ) ax.set_aspect("equal") # ax.tick_params(axis='both', which='both', length=0.0, width=0.0) ax.get_xaxis().set_ticks([]) ax.get_yaxis().set_ticks([]) if image_hashes[z] is None: ax.set_xlabel("") else: ax.set_xlabel( str(image_hashes[z]) + "\n" + str(max_pixel_counts[z] - 256 * 256)) ax.set_ylabel(z) title = "Zoom: {}".format(z) if z == min_zoom: title += " <== " if ("min_zoom" not in source["properties"] and z == 0) or ("min_zoom" in source["properties"] and source["properties"]["min_zoom"] == z): title += " ELI " ax.set_title(title) if "attribution" in source["properties"] and "text" in source[ "properties"]["attribution"]: plt.figtext(0.01, 0.01, source["properties"]["attribution"]["text"]) def update_source(selected_min_zoom, source, filename): # Check against source if we found at least one image if selected_min_zoom is not None: original_min_zoom = 0 if "min_zoom" in source["properties"]: original_min_zoom = source["properties"]["min_zoom"] # Do nothing if existing value is same as tested value if (selected_min_zoom is None or selected_min_zoom == 0) and "min_zoom" not in source["properties"]: return if not selected_min_zoom == original_min_zoom: logging.info("Update {}: {}, previously: {}".format( source["properties"]["name"], selected_min_zoom, original_min_zoom, )) if selected_min_zoom is None or selected_min_zoom == 0: source["properties"].pop("min_zoom", None) else: source["properties"]["min_zoom"] = selected_min_zoom with open(filename, "w", encoding="utf-8") as out: json.dump(source, out, indent=4, sort_keys=False, ensure_ascii=False) out.write("\n") def on_click(event): try: selected_min_zoom = int( event.inaxes.yaxis.get_label().get_text()) update_source(selected_min_zoom, source, filename) if selected_min_zoom < 10: ax = axs[0][selected_min_zoom] else: ax = axs[1][selected_min_zoom - 10] for sp in ax.spines.values(): sp.set_color("red") plt.savefig(out_image) plt.close() except Exception as e: print(str(e)) def on_key(event): selected_min_zoom = min_zoom update_source(selected_min_zoom, source, filename) if selected_min_zoom < 10: ax = axs[0][selected_min_zoom] else: ax = axs[1][selected_min_zoom - 10] for sp in ax.spines.values(): sp.set_color("red") plt.savefig(out_image) plt.close() fig.suptitle(filename) plt.tight_layout() fig.canvas.mpl_connect("button_press_event", on_click) fig.canvas.mpl_connect("key_press_event", on_key) plt.show() try: plt.close() except Exception as e: logging.warning(str(e)) return
async def shutdown_request(indexs, url, slot_path, duration, cam_timeout): async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=cam_timeout)) as session: result = await asyncio.gather(*[shutdown(session, i, url, slot_path, duration, cam_timeout) for i in indexs]) await session.close() return result
from botocore.session import Session __all__ = ["ConcurrentS3Client", "AsyncConcurrentS3Client"] _LOGGER = logging.getLogger(__name__) # Experiments on an EC2 instance with a 10 Gbps network connection # show diminishing returns (and sometimes increases in total time) # with more than 6 concurrent requests. _DEFAULT_MAX_CONCURRENT_REQUESTS = 6 # This matches the default max attempts defined in botocore. _DEFAULT_MAX_ATTEMPTS = 5 # By default, don't timeout. _DEFAULT_TIMEOUT = aiohttp.ClientTimeout(total=None, connect=None, sock_connect=None, sock_read=None) # Don't bother making concurrent requests unless the object # exceeds this size. _MIN_DOWNLOAD_PART_SIZE_BYTES = 5 * 1024 * 1024 # The minimum time (jitter notwithstanding) to sleep after # the first failure. This matches the base defined in botocore # for S3. _BACKOFF_BASE = 0.5 # The maximum sleep time between failed requests. _BACKOFF_CAP = 15 # The size of the buffer that we read into when computing # MD5 checksums. _MD5_CHUNK_SIZE_BYTES = 1024 * 1024 _XML_NAMESPACES = {"s3": "http://s3.amazonaws.com/doc/2006-03-01/"} # These are factors of 1 MiB that are used to divide multipart object
async def __aenter__(self): headers = service_auth_headers(self._deploy_config, 'query') self._session = get_context_specific_ssl_client_session( timeout=aiohttp.ClientTimeout(total=60), headers=headers) return self
def __init__(self, session, ui_port, ingress_port): self.session = session self.ui_port = ui_port self.ingress_port = ingress_port self.timeout = aiohttp.ClientTimeout(total=20)
def test_https_auth(self, ClientRequestMock) -> None: proxy_req = ClientRequest('GET', URL('http://proxy.example.com'), auth=aiohttp.helpers.BasicAuth( 'user', 'pass'), loop=self.loop) ClientRequestMock.return_value = proxy_req proxy_resp = ClientResponse('get', URL('http://proxy.example.com'), request_info=mock.Mock(), writer=mock.Mock(), continue100=None, timer=TimerNoop(), traces=[], loop=self.loop, session=mock.Mock()) proxy_req.send = make_mocked_coro(proxy_resp) proxy_resp.start = make_mocked_coro(mock.Mock(status=200)) async def make_conn(): return aiohttp.TCPConnector() connector = self.loop.run_until_complete(make_conn()) connector._resolve_host = make_mocked_coro([{ 'hostname': 'hostname', 'host': '127.0.0.1', 'port': 80, 'family': socket.AF_INET, 'proto': 0, 'flags': 0 }]) tr, proto = mock.Mock(), mock.Mock() self.loop.create_connection = make_mocked_coro((tr, proto)) self.assertIn('AUTHORIZATION', proxy_req.headers) self.assertNotIn('PROXY-AUTHORIZATION', proxy_req.headers) req = ClientRequest('GET', URL('https://www.python.org'), proxy=URL('http://proxy.example.com'), loop=self.loop) self.assertNotIn('AUTHORIZATION', req.headers) self.assertNotIn('PROXY-AUTHORIZATION', req.headers) self.loop.run_until_complete( connector._create_connection(req, None, aiohttp.ClientTimeout())) self.assertEqual(req.url.path, '/') self.assertNotIn('AUTHORIZATION', req.headers) self.assertNotIn('PROXY-AUTHORIZATION', req.headers) self.assertNotIn('AUTHORIZATION', proxy_req.headers) self.assertIn('PROXY-AUTHORIZATION', proxy_req.headers) connector._resolve_host.assert_called_with('proxy.example.com', 80, traces=mock.ANY) self.loop.run_until_complete(proxy_req.close()) proxy_resp.close() self.loop.run_until_complete(req.close())
import io import aiohttp import mock import pytest from six.moves import http_client from google.resumable_media import common from google._async_resumable_media import _helpers from google._async_resumable_media.requests import download as download_mod from tests.unit.requests import test_download as sync_test EXPECTED_TIMEOUT = aiohttp.ClientTimeout( total=None, connect=61, sock_read=60, sock_connect=None ) class TestDownload(object): @pytest.mark.asyncio async def test__write_to_stream_no_hash_check(self): stream = io.BytesIO() download = download_mod.Download(sync_test.EXAMPLE_URL, stream=stream) chunk1 = b"right now, " chunk2 = b"but a little later" response = _mock_response(chunks=[chunk1, chunk2], headers={}) ret_val = await download._write_to_stream(response) assert ret_val is None
def __init__(self): self.domain = 'http://www.ziliaoh.com/mobi.html' self.timeout = aiohttp.ClientTimeout(total=600) self.search_dict = {} os.makedirs(store) self.local_store = os.path.join(store, 'ziliaoH.db')
async def download_if_not_exist(session, path, url, download_settings, cancellable_pool, with_extension=True, session_kwargs=None, allowed_extensions=None, forbidden_extensions=None, checksum=None, signal_handler=None, unique_key=None): if session_kwargs is None: session_kwargs = {} if allowed_extensions is None: allowed_extensions = [] if forbidden_extensions is None: forbidden_extensions = [] if download_settings.allowed_extensions is not None: allowed_extensions += download_settings.allowed_extensions if download_settings.forbidden_extensions is not None: forbidden_extensions += download_settings.forbidden_extensions if isinstance(url, str): url = URL(url) domain = url.host if os.path.isabs(path): raise ValueError("Absolutes paths are not allowed") absolute_path = os.path.join(download_settings.save_path, path) if not with_extension: guess_extension = await cache.check_extension( session, str(url), session_kwargs=session_kwargs) if guess_extension is None: logger.warning(f"Could not retrieve the extension for {url}") return absolute_path += "." + guess_extension file_name = os.path.basename(absolute_path) dir_path = os.path.dirname(absolute_path) file_extension = core.utils.get_extension(file_name) temp_file_name = core.utils.add_extension(f"{random.getrandbits(64)}", file_extension) temp_absolute_path = os.path.join(core.utils.get_temp_path(), temp_file_name) old_file_name = core.utils.insert_text_before_extension(file_name, "-old") old_absolute_path = os.path.join(dir_path, old_file_name) diff_file_name = core.utils.insert_text_before_extension( file_name, "-diff") diff_absolute_path = os.path.join(dir_path, diff_file_name) force = False if checksum is not None: force = not cache.is_checksum_same(absolute_path, checksum) elif download_settings.force_download and domain not in FORCE_DOWNLOAD_BLACKLIST: force = True if os.path.exists(absolute_path) and not force: return if os.path.exists(absolute_path): headers = session_kwargs.get("headers", {}) etag = cache.get_etag(absolute_path) if etag is not None: headers["If-None-Match"] = etag if headers: session_kwargs["headers"] = headers if os.path.exists(absolute_path): action = ACTION_REPLACE else: action = ACTION_NEW if is_extension_forbidden(extension=file_extension, forbidden_extensions=forbidden_extensions, allowed_extensions=allowed_extensions): return try: async with session.get(url, timeout=aiohttp.ClientTimeout(total=0), **session_kwargs) as response: response.raise_for_status() response_headers = response.headers if response.status == 304: logger.debug(f"File '{absolute_path}' not modified") cache.save_checksum(absolute_path, checksum) return if file_extension and file_extension.lower() in MOVIE_EXTENSIONS: logger.info(f"Starting to download {file_name}") pathlib.Path(os.path.dirname(absolute_path)).mkdir(parents=True, exist_ok=True) if action == ACTION_REPLACE: shutil.move(absolute_path, temp_absolute_path) file_hash = hashlib.md5() try: with open(absolute_path, 'wb') as f: while True: chunk = await response.content.read(8192) if not chunk: break f.write(chunk) file_hash.update(chunk) except BaseException as e: os.remove(absolute_path) logger.debug(f"Removed file {absolute_path}") if action == ACTION_REPLACE: logger.debug( f"Reverting temp file to new file: {absolute_path}") shutil.move(temp_absolute_path, absolute_path) raise e if action == ACTION_REPLACE and cache.is_own_checksum_same( absolute_path, file_hash.hexdigest()): logger.debug( f"own_checksum is same for {url}. Skipping processing") if "ETag" in response_headers: cache.save_etag(absolute_path, response.headers["ETag"]) elif domain not in FORCE_DOWNLOAD_BLACKLIST: logger.warning( f"url: {url} had not an etag and is not in the blacklist") cache.save_checksum(absolute_path, checksum) return if download_settings.highlight_difference and \ action == ACTION_REPLACE and \ file_extension and \ file_extension.lower() == "pdf": await _add_pdf_highlights(download_settings=download_settings, cancellable_pool=cancellable_pool, signal_handler=signal_handler, unique_key=unique_key, absolute_path=absolute_path, old_absolute_path=temp_absolute_path, out_path=diff_absolute_path) if action == ACTION_REPLACE and download_settings.keep_replaced_files: shutil.move(temp_absolute_path, old_absolute_path) cache.save_own_checksum(absolute_path, file_hash.hexdigest()) if "ETag" in response_headers: cache.save_etag(absolute_path, response.headers["ETag"]) elif domain not in FORCE_DOWNLOAD_BLACKLIST: logger.warning( f"url: {url} had not an etag and is not in the blacklist") cache.save_checksum(absolute_path, checksum) if action == ACTION_REPLACE: signal_old_path, signal_diff_path = None, None if os.path.exists(old_absolute_path ) and download_settings.keep_replaced_files: signal_old_path = old_absolute_path if os.path.exists(diff_absolute_path ) and download_settings.highlight_difference: signal_diff_path = diff_absolute_path signal_handler.replaced_file(unique_key, absolute_path, signal_old_path, signal_diff_path) elif action == ACTION_NEW: signal_handler.added_new_file(unique_key, absolute_path) if action == ACTION_REPLACE: method_msg = "Replaced" elif action == ACTION_NEW: method_msg = "Added new" else: method_msg = "Unexpected action" start = { "name": f"{method_msg} file: '{{}}'", "var": file_name, "priority": 100, "cut": "back", } end = { "name": " in '{}'", "var": os.path.dirname(absolute_path), "priority": -100, "cut": "front", } logger.info(core.utils.fit_sections_to_console(start, end, margin=1)) finally: if os.path.exists(temp_absolute_path): os.remove(temp_absolute_path)
from __future__ import annotations import aiohttp import asyncio from datetime import datetime, timezone from typing import Any, Optional, List, Dict, Literal, cast timeout = aiohttp.ClientTimeout(total=20, connect=5) def _convert_timestamp(stamp: str) -> datetime: return datetime.strptime(stamp, "%Y-%m-%dT%H:%M:%S.%f%z").astimezone( timezone.utc).replace(microsecond=0, tzinfo=None) def _convert_title(text: str) -> str: return text.replace('_', ' ').title() # These has been taken from the status page CSS sheet colors: Dict[str, int] = { # Just color names "green": 0x26935C, "blue": 0x3498DB, "yellow": 0xFCCF2C, "orange": 0xE8740F, "red": 0xE74C3C, # Component statuses: "operational": 0x26935C, # green "under_maintenance": 0x3498DB, # blue
async def promise(coro): return asyncio.run_coroutine_threadsafe(coro, discord.loop) async def swait(coro): return promise(coro).result() if __name__ == "__main__": discord = DiscordClient(loop=asyncio.get_event_loop()) server = communication.Server(SocketClient, "0.0.0.0", 5654, loop=asyncio.new_event_loop()) timeout = aiohttp.ClientTimeout(total=2) SocketClient.session = aiohttp.ClientSession(timeout=timeout, loop=server.loop) server.loop.create_task(server.start()) try: print("[DISCORD] Turning on...") discord.run(os.getenv("FSOL_DISCORD_TOKEN")) except KeyboardInterrupt: pass except Exception: traceback.print_exc() try: server.loop.call_soon_threadsafe(server.loop.stop) except:
def __init__(self, url, pk, pub_secret=None, sub_secret=None): timeout = aiohttp.ClientTimeout(total=0) super().__init__(url, timeout=timeout) self.pk = pk self.pub_secret = pub_secret and pub_secret.encode('utf-8') self.sub_secret = sub_secret.encode('utf-8')
def aiohttp_socket_timeout(socket_timeout_s): """ Return a aiohttp.ClientTimeout object with only socket timeouts set. """ return aiohttp.ClientTimeout(total=None, connect=None, sock_connect=socket_timeout_s, sock_read=socket_timeout_s)
async def connection_handler(self, url): try: async with aiohttp.ClientSession( loop=self._loop, timeout=aiohttp.ClientTimeout(total=10)) as session: full_path = url + '/' + self.origin self.ws = await session.ws_connect(full_path) envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_header( 'type', 'open') self.platform.send_event(envelope) self.log.info("Connected to " + full_path) closed = False self.last_active = time.time() while self.normal: try: msg = await self.ws.receive(timeout=1) except asyncio.TimeoutError: if not self.normal: break else: # idle - send keep-alive now = time.time() if self.is_connected( ) and now - self.last_active > 30: self.last_active = now self.send_keep_alive() continue # receive incoming event self.last_active = time.time() if msg.type == aiohttp.WSMsgType.TEXT: if self.platform.has_route(self.INCOMING_WS_PATH): envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_header( 'type', 'text').set_body(msg.data) self.platform.send_event(envelope) else: break elif msg.type == aiohttp.WSMsgType.BINARY: if self.platform.has_route(self.INCOMING_WS_PATH): envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_header( 'type', 'bytes').set_body(msg.data) self.platform.send_event(envelope) else: break else: if msg.type == aiohttp.WSMsgType.ERROR: self.log.error("Unexpected connection error") if msg.type == aiohttp.WSMsgType.CLOSING: # closing signal received - close the connection now self.log.info("Disconnected, status=" + str(self.close_code) + ", message=" + self.close_message) await self.ws.close(code=self.close_code, message=bytes( self.close_message, 'utf-8')) if self.platform.has_route(self.INCOMING_WS_PATH): envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_body(self.close_message)\ .set_header('type', 'close').set_header('status', self.close_code) self.platform.send_event(envelope) closed = True if msg.type == aiohttp.WSMsgType.CLOSE or msg.type == aiohttp.WSMsgType.CLOSED: self.close_code = 1001 if msg.data is None else msg.data self.close_message = 'OK' if msg.extra is None else str( msg.extra) self.log.info("Disconnected, status=" + str(self.close_code) + ", message=" + self.close_message) if self.platform.has_route(self.INCOMING_WS_PATH): envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_body(self.close_message)\ .set_header('type', 'close').set_header('status', self.close_code) self.platform.send_event(envelope) closed = True break if not closed: await self.ws.close(code=1000, message=b'OK') self.ws = None if self.platform.has_route(self.INCOMING_WS_PATH): envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_body('OK')\ .set_header('type', 'close').set_header('status', 1000) self.platform.send_event(envelope) except aiohttp.ClientConnectorError: self._skip_url() self.log.warn("Unreachable " + url)
def test_timeout(root_uninitialized: Root) -> None: assert root_uninitialized.timeout == aiohttp.ClientTimeout( None, None, 60, 60)
async def one_page_crawl(self, url): # Create a http connection to given url # Extract the URL from page content # Add extracted URL to DB for next crawl try: # Make engine name and user_id as solr collection name # Create temp collection for full recrawll , and then swap it later solr_coll_name = self.task_details.get("engine_name") ename = self.task_details.get("engine_name") dname = self.task_details.get("domain_name") user_id = self.task_details.get("user_id") solr_coll_name = user_id + "_" + solr_coll_name tmp_solr_coll_name = solr_coll_name + "_temp" solr_db_url = webr_solr_url + "/solr/" + solr_coll_name + "/" max_html_page_size = 20000000 max_file_download_size = 20000000 extract_res = None # Create connection for Solr DB solr_conn = aiohttp.TCPConnector() solr_timeout = aiohttp.ClientTimeout(sock_connect=5) self.logger.debug("Trying to Crawl> " + url) new_urls = [] resp_status = None last_modified = None content_typ = None url_modified = False error = "no" conn = aiohttp.TCPConnector() timeout = aiohttp.ClientTimeout(sock_connect=500) headers = {"User-Agent": "superman"} async with aiohttp.ClientSession(connector=conn, timeout=timeout) as session: async with session.get(url, headers=headers, allow_redirects=True, max_redirects=10) as resp: # Count the responce code try: if resp != None: resp_status = resp.status self.count_http_code(resp.status) self.visted_urls_count = self.visted_urls_count + 1 content_typ = resp.headers.get("Content-Type") last_modified = resp.headers.get("Last-Modified") #charset = resp.headers.get('charset') content_length = resp.headers.get("Content-Length") #Etag = resp.headers.get('Etag') if resp.status == 200: # Check IF there is any redirectiond if str(resp.url) != url: # Request URL and responce url not mached ( Redirection) url_modified = True # Add redirected url as new url new_urls.append(str(resp.url)) raise ValueError("REDIRECT_URL") try: content_length = int(content_length) except: content_length = None self.logger.debug("Response code:" + str(resp.status) + "> URL> " + url) application_type = content_typ.split(";")[0] self.count_application_types(application_type) #self.logger.debug(str(resp.headers)) # Check for user whitelist application extract_var = dict() extract_var.update({"url": url}) if application_type in self.WhiteListApp: if application_type == "text/html" or application_type == "application/html": # If file type is "html" then read the full payload if content_length == None or content_length <= max_html_page_size: payload = await resp.content.read( max_html_page_size) extract_var.update({ "url_extract": True, "application": "html", "payload_type": "data" }) extract_res = await self.http_response_extractor( payload, extract_var) else: #self.mdb_collect.update_one({"_id":url},{"$set":{"status":"error","error":"large page size"}}) self.logger.warning( "Content length(" + str(content_length) + ") not satisfied with maximum allowed for>" + url) else: # If file type is not an html then chunk the file and read extract_var.update({ "url_extract": False, "application": application_type, "payload_type": "file" }) self.logger.debug( "New application >" + str(application_type) + str(" >") + url) temp_fp = tempfile.TemporaryFile() #print(resp.headers.get("Content-Length")) chunk = None while chunk != b'': chunk = await resp.content.read( max_file_download_size) #async for data in response.content.iter_chunked(max_file_download_size): max_file_download_size = max_file_download_size - chunk.__len__( ) if not chunk or max_file_download_size <= 0: #print("Bracked>"+str(resp.headers.get("Content-Length"))) break else: temp_fp.write(chunk) temp_fp.seek(0) extract_res = await self.http_response_extractor( temp_fp, extract_var) if type(extract_res) == dict: all_href = extract_res.get( "extracted_url") extract_content = extract_res.get( "content") extract_content.update({"url": url}) extract_content.update( {"domain": dname}) #extract_content.update({"id":url}) # Add Extracted data to solr Database # Get all href in page solr_res = await self.solr_doc_add( solr_conn, solr_timeout, extract_content, solr_db_url) self.logger.info( "SOLR UPDATE STATUS:" + solr_res.get("error") + " >for URL:" + url) for href in all_href: black_list = False robot_black_list = False href = href['href'] try: # Join the URL with current url extracted_url = urllib.parse.urljoin( str(resp.url), href) extracted_url = url_normalize( extracted_url) extracted_url = urllib.parse.urldefrag( extracted_url)[0] validate = validators.url( extracted_url) if validate != True: self.logger.error( str(validate)) continue except: self.logger.exception( "url normalize failed") continue # Check if url is allowed or blocked in robots.txt for domain_patten in self.robot_disallowed: domain_patten = urllib.parse.urljoin( str(resp.url), domain_patten) if urlmatch( domain_patten, extracted_url) == True: self.logger.debug( "Url Black listed by robots.txt>" + extracted_url + " >Patten >" + domain_patten) robot_black_list = True break if robot_black_list == True: continue # Check for user blacklist and whitelist url's if len(self.BlackListUrls) > 0: # Check if given url is blacklisted for domain_patten in self.BlackListUrls: if urlmatch( domain_patten, extracted_url ) == True: self.logger.debug( "Url Black listed by user>" + extracted_url + " >Patten >" + domain_patten) black_list = True break if black_list == True: continue for domain_patten in self.WhiteListUrls: if urlmatch( domain_patten, extracted_url) == True: #print("URL Matched:"+extracted_url+", Patten:"+domain_patten) new_urls.append( extracted_url) #logger.info(extracted_url) else: self.logger.debug( "URL Not Matched with WhiteListUrls:" + extracted_url + ", Patten:" + domain_patten) else: self.crawl_message = "data extracting error" else: self.logger.debug( "Application not white listed by user>" + str(application_type) + str(" >") + url) else: self.logger.debug("Response code:" + str(resp.status) + "> URL> " + url) except ValueError as vlaue_error: pass # Make current URL as completed state #print("Completed> "+url) # If URL redirect found then insert the redirected URL if url_modified == True: self.mdb_collect.update_one({"_id": url}, { "$set": { "status": "completed", "url_type": "redirected", "redirected_url": str(resp.url) } }) else: self.mdb_collect.update_one({"_id": url}, { "$set": { "status": "completed", "Content-Type": str(content_typ), "Last-Modified": str(last_modified), "Content-Length": str(content_length), "response_status": str(resp_status) } }) self.logger.info("Url:" + url + " >" + " Having > " + str(len(new_urls)) + " Link(s)") for new_url in new_urls: # If "new_url" not in database then insert results = self.mdb_collect.update_one({"_id": new_url}, { "$setOnInsert": { "status": "pending", "version": self.crawl_version, "_id": new_url, "domain_name": dname } }, upsert=True) if results.modified_count != None: # If "new_url" in database and version not matched with current then # update the current version with pending as status self.mdb_collect.update_one( { "_id": new_url, "version": { "$ne": self.crawl_version } }, { "$set": { "status": "pending", "version": self.crawl_version } }) except Exception: self.crawl_message = "Error found" self.craw_fin = True self.logger.exception("one_page_crawl") finally: self.free_workers = self.free_workers + 1
async def scrape_siaogang_kaohsiung() -> ScrapedData: timeout = aiohttp.ClientTimeout(total=5) async with aiohttp.ClientSession(timeout=timeout) as session: async with session.get(URL) as r: return parse_siaogang_kaohsiung(await r.text())
from .utils import remove_data from .validate import SCHEMA_ADDON_SNAPSHOT _LOGGER: logging.Logger = logging.getLogger(__name__) RE_WEBUI = re.compile( r"^(?:(?P<s_prefix>https?)|\[PROTO:(?P<t_proto>\w+)\])" r":\/\/\[HOST\]:\[PORT:(?P<t_port>\d+)\](?P<s_suffix>.*)$") RE_WATCHDOG = re.compile( r"^(?:(?P<s_prefix>https?|tcp)|\[PROTO:(?P<t_proto>\w+)\])" r":\/\/\[HOST\]:\[PORT:(?P<t_port>\d+)\](?P<s_suffix>.*)$") RE_OLD_AUDIO = re.compile(r"\d+,\d+") WATCHDOG_TIMEOUT = aiohttp.ClientTimeout(total=10) class Addon(AddonModel): """Hold data for add-on inside Supervisor.""" def __init__(self, coresys: CoreSys, slug: str): """Initialize data holder.""" super().__init__(coresys, slug) self.instance: DockerAddon = DockerAddon(coresys, self) self.state: AddonState = AddonState.UNKNOWN def __repr__(self) -> str: """Return internal representation.""" return f"<Addon: {self.slug}>" @property
def get_new_session() -> aiohttp.ClientSession: return aiohttp.ClientSession(json_serialize=ujson.dumps, timeout=aiohttp.ClientTimeout(total=300))
async def capture_request(indexs, url, slot_path, slot_reserved, mask, cam_timeout, threshold): async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=cam_timeout)) as session: result = await asyncio.gather(*[capture(session, i, url, slot_path, slot_reserved, mask, cam_timeout, threshold) for i in indexs]) await session.close() return result
def __init__(self): self.session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout( total=4))
async def async_main(): stdout_handler = logging.StreamHandler(sys.stdout) for logger_name in [ 'aiohttp.server', 'aiohttp.web', 'aiohttp.access', 'proxy' ]: logger = logging.getLogger(logger_name) logger.setLevel(logging.INFO) logger.addHandler(stdout_handler) env = normalise_environment(os.environ) port = int(env['PROXY_PORT']) admin_root = env['UPSTREAM_ROOT'] hawk_senders = env['HAWK_SENDERS'] sso_base_url = env['AUTHBROKER_URL'] sso_client_id = env['AUTHBROKER_CLIENT_ID'] sso_client_secret = env['AUTHBROKER_CLIENT_SECRET'] redis_url = env['REDIS_URL'] root_domain = env['APPLICATION_ROOT_DOMAIN'] basic_auth_user = env['METRICS_SERVICE_DISCOVERY_BASIC_AUTH_USER'] basic_auth_password = env['METRICS_SERVICE_DISCOVERY_BASIC_AUTH_PASSWORD'] x_forwarded_for_trusted_hops = int(env['X_FORWARDED_FOR_TRUSTED_HOPS']) application_ip_whitelist = env['APPLICATION_IP_WHITELIST'] root_domain_no_port, _, root_port_str = root_domain.partition(':') try: root_port = int(root_port_str) except ValueError: root_port = None csp_common = "object-src 'none';" if root_domain not in ['dataworkspace.test:8000']: csp_common += 'upgrade-insecure-requests;' # A spawning application on <my-application>.<root_domain> shows the admin-styled site, # fetching assets from <root_domain>, but also makes requests to the current domain csp_application_spawning = csp_common + ( f'default-src {root_domain};' f'base-uri {root_domain};' f'font-src {root_domain} data:;' f'form-action {root_domain} *.{root_domain};' f'frame-ancestors {root_domain};' f'img-src {root_domain} data: https://www.googletagmanager.com https://www.google-analytics.com;' f"script-src 'unsafe-inline' {root_domain} https://www.googletagmanager.com https://www.google-analytics.com;" f"style-src 'unsafe-inline' {root_domain};" f"connect-src {root_domain} 'self';") # A running application should only connect to self: this is where we have the most # concern because we run the least-trusted code def csp_application_running(host): return csp_common + ( "default-src 'self';" "base-uri 'self';" # Safari does not have a 'self' for WebSockets f"connect-src 'self' wss://{host};" "font-src 'self' data:;" "form-action 'self';" "frame-ancestors 'self';" "img-src 'self' data:;" # Both JupyterLab and RStudio need `unsafe-eval` "script-src 'unsafe-inline' 'unsafe-eval' 'self';" "style-src 'unsafe-inline' 'self';") redis_pool = await aioredis.create_redis_pool(redis_url) default_http_timeout = aiohttp.ClientTimeout() # When spawning and tring to detect if the app is running, # we fail quickly and often so a connection check is quick spawning_http_timeout = aiohttp.ClientTimeout(sock_read=5, sock_connect=2) def get_random_context_logger(): return ContextAdapter( logger, {'context': ''.join(random.choices(CONTEXT_ALPHABET, k=8))}) def without_transfer_encoding(request_or_response): return tuple((key, value) for key, value in request_or_response.headers.items() if key.lower() != 'transfer-encoding') def admin_headers(downstream_request): return (without_transfer_encoding(downstream_request) + downstream_request['sso_profile_headers']) def application_headers(downstream_request): return without_transfer_encoding(downstream_request) + ( (('x-scheme', downstream_request.headers['x-forwarded-proto']), ) if 'x-forwarded-proto' in downstream_request.headers else ()) def is_service_discovery(request): return (request.url.path == '/api/v1/application' and request.url.host == root_domain_no_port and request.method == 'GET') def is_app_requested(request): return request.url.host.endswith(f'.{root_domain_no_port}') def is_requesting_credentials(request): return (request.url.host == root_domain_no_port and request.url.path == '/api/v1/aws_credentials') def is_requesting_files(request): return request.url.host == root_domain_no_port and request.url.path == '/files' def is_dataset_requested(request): return (request.url.path.startswith('/api/v1/dataset/') or request.url.path.startswith('/api/v1/reference-dataset/') and request.url.host == root_domain_no_port) def is_hawk_auth_required(request): return is_dataset_requested(request) def is_healthcheck_requested(request): return (request.url.path == '/healthcheck' and request.method == 'GET' and not is_app_requested(request)) def is_table_requested(request): return (request.url.path.startswith('/api/v1/table/') and request.url.host == root_domain_no_port and request.method == 'POST') def is_sso_auth_required(request): return (not is_healthcheck_requested(request) and not is_service_discovery(request) and not is_table_requested(request) and not is_dataset_requested(request)) def get_peer_ip(request): peer_ip = (request.headers['x-forwarded-for'].split(',') [-x_forwarded_for_trusted_hops].strip()) is_private = True try: is_private = ipaddress.ip_address(peer_ip).is_private except ValueError: is_private = False return peer_ip, is_private async def handle(downstream_request): method = downstream_request.method path = downstream_request.url.path query = downstream_request.url.query app_requested = is_app_requested(downstream_request) # Websocket connections # - tend to close unexpectedly, both from the client and app # - don't need to show anything nice to the user on error is_websocket = (downstream_request.headers.get('connection', '').lower() == 'upgrade' and downstream_request.headers.get( 'upgrade', '').lower() == 'websocket') try: return (await handle_application(is_websocket, downstream_request, method, path, query) if app_requested else await handle_admin( downstream_request, method, path, query)) except Exception as exception: logger.exception( 'Exception during %s %s %s', downstream_request.method, downstream_request.url, type(exception), ) if is_websocket: raise params = ({ 'message': exception.args[0] } if isinstance(exception, UserException) else {}) status = exception.args[1] if isinstance(exception, UserException) else 500 return await handle_http( downstream_request, 'GET', CIMultiDict(admin_headers(downstream_request)), URL(admin_root).with_path(f'/error_{status}'), params, default_http_timeout, ) async def handle_application(is_websocket, downstream_request, method, path, query): public_host, _, _ = downstream_request.url.host.partition( f'.{root_domain_no_port}') possible_public_host, _, public_host_or_port_override = public_host.rpartition( '--') try: port_override = int(public_host_or_port_override) except ValueError: port_override = None else: public_host = possible_public_host host_api_url = admin_root + '/api/v1/application/' + public_host host_html_path = '/tools/' + public_host async with client_session.request( 'GET', host_api_url, headers=CIMultiDict( admin_headers(downstream_request))) as response: host_exists = response.status == 200 application = await response.json() if response.status != 200 and response.status != 404: raise UserException('Unable to start the application', response.status) if host_exists and application['state'] not in ['SPAWNING', 'RUNNING']: if ('x-data-workspace-no-modify-application-instance' not in downstream_request.headers): async with client_session.request( 'DELETE', host_api_url, headers=CIMultiDict(admin_headers(downstream_request)), ) as delete_response: await delete_response.read() raise UserException('Application ' + application['state'], 500) if not host_exists: if ('x-data-workspace-no-modify-application-instance' not in downstream_request.headers): params = { key: value for key, value in downstream_request.query.items() if key == '__memory_cpu' } async with client_session.request( 'PUT', host_api_url, params=params, headers=CIMultiDict(admin_headers(downstream_request)), ) as response: host_exists = response.status == 200 application = await response.json() if params: return web.Response(status=302, headers={'location': '/'}) else: raise UserException('Application stopped while starting', 500) if response.status != 200: raise UserException('Unable to start the application', response.status) if application['state'] not in ['SPAWNING', 'RUNNING']: raise UserException( 'Attempted to start the application, but it ' + application['state'], 500, ) if not application['proxy_url']: return await handle_http( downstream_request, 'GET', CIMultiDict(admin_headers(downstream_request)), admin_root + host_html_path + '/spawning', {}, default_http_timeout, (('content-security-policy', csp_application_spawning), ), ) return (await handle_application_websocket( downstream_request, application['proxy_url'], path, query, port_override) if is_websocket else await handle_application_http_spawning( downstream_request, method, application_upstream(application['proxy_url'], path, port_override), query, host_html_path, host_api_url, ) if application['state'] == 'SPAWNING' else await handle_application_http_running( downstream_request, method, application_upstream(application['proxy_url'], path, port_override), query, host_api_url, )) async def handle_application_websocket(downstream_request, proxy_url, path, query, port_override): upstream_url = application_upstream(proxy_url, path, port_override).with_query(query) return await handle_websocket( downstream_request, CIMultiDict(application_headers(downstream_request)), upstream_url, ) def application_upstream(proxy_url, path, port_override): return (URL(proxy_url).with_path(path) if port_override is None else URL(proxy_url).with_path(path).with_port(port_override)) async def handle_application_http_spawning(downstream_request, method, upstream_url, query, host_html_path, host_api_url): host = downstream_request.headers['host'] try: logger.info('Spawning: Attempting to connect to %s', upstream_url) response = await handle_http( downstream_request, method, CIMultiDict(application_headers(downstream_request)), upstream_url, query, spawning_http_timeout, # Although the application is spawning, if the response makes it back to the client, # we know the application is running, so we return the _running_ CSP headers ( ('content-security-policy', csp_application_running(host)), ), ) except Exception: logger.info('Spawning: Failed to connect to %s', upstream_url) return await handle_http( downstream_request, 'GET', CIMultiDict(admin_headers(downstream_request)), admin_root + host_html_path + '/spawning', {}, default_http_timeout, (('content-security-policy', csp_application_spawning), ), ) else: # Once a streaming response is done, if we have not yet returned # from the handler, it looks like aiohttp can cancel the current # task. We set RUNNING in another task to avoid it being cancelled async def set_application_running(): async with client_session.request( 'PATCH', host_api_url, json={'state': 'RUNNING'}, headers=CIMultiDict(admin_headers(downstream_request)), timeout=default_http_timeout, ) as patch_response: await patch_response.read() asyncio.ensure_future(set_application_running()) return response async def handle_application_http_running(downstream_request, method, upstream_url, query, _): # For the time being, we don't attempt to delete if an application has failed # Since initial attempts were too sensistive, and would delete the application # when it was still running # try: # return await handle_http(downstream_request, method, headers, upstream_url, query, default_http_timeout) # except (aiohttp.client_exceptions.ClientConnectionError, asyncio.TimeoutError): # async with client_session.request('DELETE', host_api_url, headers=headers) as delete_response: # await delete_response.read() # raise host = downstream_request.headers['host'] return await handle_http( downstream_request, method, CIMultiDict(application_headers(downstream_request)), upstream_url, query, default_http_timeout, (('content-security-policy', csp_application_running(host)), ), ) async def handle_admin(downstream_request, method, path, query): upstream_url = URL(admin_root).with_path(path) return await handle_http( downstream_request, method, CIMultiDict(admin_headers(downstream_request)), upstream_url, query, default_http_timeout, ) async def handle_websocket(downstream_request, upstream_headers, upstream_url): protocol = downstream_request.headers.get('Sec-WebSocket-Protocol') protocols = (protocol, ) if protocol else () async def proxy_msg(msg, to_ws): if msg.type == aiohttp.WSMsgType.TEXT: await to_ws.send_str(msg.data) elif msg.type == aiohttp.WSMsgType.BINARY: await to_ws.send_bytes(msg.data) elif msg.type == aiohttp.WSMsgType.CLOSE: await to_ws.close() elif msg.type == aiohttp.WSMsgType.ERROR: await to_ws.close() async def upstream(): try: async with client_session.ws_connect( str(upstream_url), headers=upstream_headers, protocols=protocols) as upstream_ws: upstream_connection.set_result(upstream_ws) downstream_ws = await downstream_connection async for msg in upstream_ws: await proxy_msg(msg, downstream_ws) except BaseException as exception: if not upstream_connection.done(): upstream_connection.set_exception(exception) raise finally: await downstream_ws.close() # This is slightly convoluted, but aiohttp documents that reading # from websockets should be done in the same task as the websocket was # created, so we read from downstream in _this_ task, and create # another task to connect to and read from the upstream socket. We # also need to make sure we wait for each connection before sending # data to it downstream_connection = asyncio.Future() upstream_connection = asyncio.Future() upstream_task = asyncio.ensure_future(upstream()) try: upstream_ws = await upstream_connection _, _, _, with_session_cookie = downstream_request[SESSION_KEY] downstream_ws = await with_session_cookie( web.WebSocketResponse(protocols=protocols)) await downstream_ws.prepare(downstream_request) downstream_connection.set_result(downstream_ws) async for msg in downstream_ws: await proxy_msg(msg, upstream_ws) finally: upstream_task.cancel() return downstream_ws async def handle_http( downstream_request, upstream_method, upstream_headers, upstream_url, upstream_query, timeout, response_headers=tuple(), ): # Avoid aiohttp treating request as chunked unnecessarily, which works # for some upstream servers, but not all. Specifically RStudio drops # GET responses half way through if the request specified a chunked # encoding. AFAIK RStudio uses a custom webserver, so this behaviour # is not documented anywhere. # fmt: off data = \ b'' if ( 'content-length' not in upstream_headers and downstream_request.headers.get('transfer-encoding', '').lower() != 'chunked' ) else \ await downstream_request.read() if downstream_request.content.at_eof() else \ downstream_request.content # fmt: on async with client_session.request( upstream_method, str(upstream_url), params=upstream_query, headers=upstream_headers, data=data, allow_redirects=False, timeout=timeout, ) as upstream_response: _, _, _, with_session_cookie = downstream_request[SESSION_KEY] downstream_response = await with_session_cookie( web.StreamResponse( status=upstream_response.status, headers=CIMultiDict( without_transfer_encoding(upstream_response) + response_headers), )) await downstream_response.prepare(downstream_request) async for chunk in upstream_response.content.iter_any(): await downstream_response.write(chunk) return downstream_response def server_logger(): @web.middleware async def _server_logger(request, handler): request_logger = get_random_context_logger() request['logger'] = request_logger request_logger.info( 'Receiving (%s) (%s %s HTTP/%s.%s) (%s) (%s)', *((request.remote, request.method, request.path_qs) + request.version + ( request.headers.get('User-Agent', '-'), request.headers.get('X-Forwarded-For', '-'), )), ) response = await handler(request) request_logger.info('Responding (%s) (%s)', response.status, response.content_length) return response return _server_logger def authenticate_by_staff_sso_token(): me_path = 'api/v1/user/me/' @web.middleware async def _authenticate_by_staff_sso_token(request, handler): staff_sso_token_required = is_table_requested(request) request.setdefault('sso_profile_headers', ()) if not staff_sso_token_required: return await handler(request) if 'Authorization' not in request.headers: request['logger'].info( 'SSO-token unathenticated: missing authorization header') return await handle_admin(request, 'GET', '/error_403', {}) async with client_session.get( f'{sso_base_url}{me_path}', headers={ 'Authorization': request.headers['Authorization'] }, ) as me_response: me_profile = (await me_response.json() if me_response.status == 200 else None) if not me_profile: request['logger'].info( 'SSO-token unathenticated: bad authorization header') return await handle_admin(request, 'GET', '/error_403', {}) request['sso_profile_headers'] = ( ('sso-profile-email', me_profile['email']), ( 'sso-profile-related-emails', ','.join(me_profile.get('related_emails', [])), ), ('sso-profile-user-id', me_profile['user_id']), ('sso-profile-first-name', me_profile['first_name']), ('sso-profile-last-name', me_profile['last_name']), ) request['logger'].info( 'SSO-token authenticated: %s %s', me_profile['email'], me_profile['user_id'], ) return await handler(request) return _authenticate_by_staff_sso_token def authenticate_by_staff_sso(): auth_path = 'o/authorize/' token_path = 'o/token/' me_path = 'api/v1/user/me/' grant_type = 'authorization_code' scope = 'read write' response_type = 'code' redirect_from_sso_path = '/__redirect_from_sso' session_token_key = 'staff_sso_access_token' async def get_redirect_uri_authenticate(set_session_value, redirect_uri_final): scheme = URL(redirect_uri_final).scheme sso_state = await set_redirect_uri_final(set_session_value, redirect_uri_final) redirect_uri_callback = urllib.parse.quote( get_redirect_uri_callback(scheme), safe='') return (f'{sso_base_url}{auth_path}?' f'scope={scope}&state={sso_state}&' f'redirect_uri={redirect_uri_callback}&' f'response_type={response_type}&' f'client_id={sso_client_id}') def request_scheme(request): return request.headers.get('x-forwarded-proto', request.url.scheme) def request_url(request): return str(request.url.with_scheme(request_scheme(request))) def get_redirect_uri_callback(scheme): return str( URL.build( host=root_domain_no_port, port=root_port, scheme=scheme, path=redirect_from_sso_path, )) async def set_redirect_uri_final(set_session_value, redirect_uri_final): session_key = secrets.token_hex(32) sso_state = urllib.parse.quote( f'{session_key}_{redirect_uri_final}', safe='') await set_session_value(session_key, redirect_uri_final) return sso_state async def get_redirect_uri_final(get_session_value, sso_state): session_key, _, state_redirect_url = urllib.parse.unquote( sso_state).partition('_') return state_redirect_url, await get_session_value(session_key) async def redirection_to_sso(with_new_session_cookie, set_session_value, redirect_uri_final): return await with_new_session_cookie( web.Response( status=302, headers={ 'Location': await get_redirect_uri_authenticate(set_session_value, redirect_uri_final) }, )) @web.middleware async def _authenticate_by_sso(request, handler): sso_auth_required = is_sso_auth_required(request) if not sso_auth_required: request.setdefault('sso_profile_headers', ()) return await handler(request) get_session_value, set_session_value, with_new_session_cookie, _ = request[ SESSION_KEY] token = await get_session_value(session_token_key) if request.path != redirect_from_sso_path and token is None: return await redirection_to_sso(with_new_session_cookie, set_session_value, request_url(request)) if request.path == redirect_from_sso_path: code = request.query['code'] sso_state = request.query['state'] ( redirect_uri_final_from_url, redirect_uri_final_from_session, ) = await get_redirect_uri_final(get_session_value, sso_state) if redirect_uri_final_from_url != redirect_uri_final_from_session: # We might have been overtaken by a parallel request initiating another auth # flow, and so another session. However, because we haven't retrieved the final # URL from the session, we can't be sure that this is the same client that # initiated this flow. However, we can redirect back to SSO return await redirection_to_sso( with_new_session_cookie, set_session_value, redirect_uri_final_from_url, ) async with client_session.post( f'{sso_base_url}{token_path}', data={ 'grant_type': grant_type, 'code': code, 'client_id': sso_client_id, 'client_secret': sso_client_secret, 'redirect_uri': get_redirect_uri_callback(request_scheme(request)), }, ) as sso_response: sso_response_json = await sso_response.json() await set_session_value(session_token_key, sso_response_json['access_token']) return await with_new_session_cookie( web.Response( status=302, headers={'Location': redirect_uri_final_from_session}, )) # Get profile from Redis cache to avoid calling SSO on every request redis_profile_key = f'{PROFILE_CACHE_PREFIX}___{session_token_key}___{token}'.encode( 'ascii') with await redis_pool as conn: me_profile_raw = await conn.execute('GET', redis_profile_key) me_profile = json.loads(me_profile_raw) if me_profile_raw else None async def handler_with_sso_headers(): request['sso_profile_headers'] = ( ('sso-profile-email', me_profile['email']), ( 'sso-profile-related-emails', ','.join(me_profile.get('related_emails', [])), ), ('sso-profile-user-id', me_profile['user_id']), ('sso-profile-first-name', me_profile['first_name']), ('sso-profile-last-name', me_profile['last_name']), ) request['logger'].info( 'SSO-authenticated: %s %s', me_profile['email'], me_profile['user_id'], ) return await handler(request) if me_profile: return await handler_with_sso_headers() async with client_session.get( f'{sso_base_url}{me_path}', headers={'Authorization': f'Bearer {token}'}) as me_response: me_profile_full = (await me_response.json() if me_response.status == 200 else None) if not me_profile_full: return await redirection_to_sso(with_new_session_cookie, set_session_value, request_url(request)) me_profile = { 'email': me_profile_full['email'], 'related_emails': me_profile_full['related_emails'], 'user_id': me_profile_full['user_id'], 'first_name': me_profile_full['first_name'], 'last_name': me_profile_full['last_name'], } with await redis_pool as conn: await conn.execute( 'SET', redis_profile_key, json.dumps(me_profile).encode('utf-8'), 'EX', 60, ) return await handler_with_sso_headers() return _authenticate_by_sso def authenticate_by_basic_auth(): @web.middleware async def _authenticate_by_basic_auth(request, handler): basic_auth_required = is_service_discovery(request) if not basic_auth_required: return await handler(request) if 'Authorization' not in request.headers: return web.Response(status=401) basic_auth_prefix = 'Basic ' auth_value = (request.headers['Authorization'] [len(basic_auth_prefix):].strip().encode('ascii')) required_auth_value = base64.b64encode( f'{basic_auth_user}:{basic_auth_password}'.encode('ascii')) if len(auth_value) != len( required_auth_value) or not hmac.compare_digest( auth_value, required_auth_value): return web.Response(status=401) request['logger'].info('Basic-authenticated: %s', basic_auth_user) return await handler(request) return _authenticate_by_basic_auth def authenticate_by_hawk_auth(): async def lookup_credentials(sender_id): for hawk_sender in hawk_senders: if hawk_sender['id'] == sender_id: return hawk_sender async def seen_nonce(nonce, sender_id): nonce_key = f'nonce-{sender_id}-{nonce}' with await redis_pool as conn: response = await conn.execute('SET', nonce_key, '1', 'EX', 60, 'NX') seen_nonce = response != b'OK' return seen_nonce @web.middleware async def _authenticate_by_hawk_auth(request, handler): hawk_auth_required = is_hawk_auth_required(request) if not hawk_auth_required: return await handler(request) try: authorization_header = request.headers['Authorization'] except KeyError: request['logger'].info('Hawk missing header') return web.Response(status=401) content = await request.read() is_authenticated, error_message, creds = await authenticate_hawk_header( lookup_credentials, seen_nonce, 15, authorization_header, request.method, request.url.host, request.url.port, request.url.path, request.headers['Content-Type'], content, ) if not is_authenticated: request['logger'].info('Hawk unauthenticated: %s', error_message) return web.Response(status=401) request['logger'].info('Hawk authenticated: %s', creds['id']) return await handler(request) return _authenticate_by_hawk_auth def authenticate_by_ip_whitelist(): @web.middleware async def _authenticate_by_ip_whitelist(request, handler): ip_whitelist_required = (is_app_requested(request) or is_requesting_credentials(request) or is_requesting_files(request)) if not ip_whitelist_required: return await handler(request) peer_ip, _ = get_peer_ip(request) peer_ip_in_whitelist = any( ipaddress.IPv4Address(peer_ip) in ipaddress.IPv4Network( address_or_subnet) for address_or_subnet in application_ip_whitelist) if not peer_ip_in_whitelist: request['logger'].info('IP-whitelist unauthenticated: %s', peer_ip) return await handle_admin(request, 'GET', '/error_403', {}) request['logger'].info('IP-whitelist authenticated: %s', peer_ip) return await handler(request) return _authenticate_by_ip_whitelist async with aiohttp.ClientSession( auto_decompress=False, cookie_jar=aiohttp.DummyCookieJar()) as client_session: app = web.Application(middlewares=[ server_logger(), redis_session_middleware(redis_pool, root_domain_no_port), authenticate_by_staff_sso_token(), authenticate_by_staff_sso(), authenticate_by_basic_auth(), authenticate_by_hawk_auth(), authenticate_by_ip_whitelist(), ]) app.add_routes([ getattr(web, method)(r'/{path:.*}', handle) for method in [ 'delete', 'get', 'head', 'options', 'patch', 'post', 'put', ] ]) runner = web.AppRunner(app) await runner.setup() site = web.TCPSite(runner, '0.0.0.0', port) await site.start() await asyncio.Future()
async def _connect_and_read(self): """Retrieves the WS url and connects to Slack's RTM API. Makes an authenticated call to Slack's Web API to retrieve a websocket URL. Then connects to the message server and reads event messages as they come in. If 'auto_reconnect' is specified we retrieve a new url and reconnect any time the connection is lost unintentionally or an exception is thrown. Raises: SlackApiError: Unable to retrieve RTM URL from Slack. websockets.exceptions: Errors thrown by the 'websockets' library. """ while not self._stopped: try: self._connection_attempts += 1 async with aiohttp.ClientSession( timeout=aiohttp.ClientTimeout(total=self.timeout) ) as session: self._session = session url, data = await self._retrieve_websocket_info() async with session.ws_connect( url, heartbeat=self.ping_interval, ssl=self.ssl, proxy=self.proxy, ) as websocket: self._logger.debug("The Websocket connection has been opened.") self._websocket = websocket await self._dispatch_event(event="open", data=data) await self._read_messages() # The websocket has been disconnected, or self._stopped is True if not self._stopped and not self.auto_reconnect: self._logger.warning( "Not reconnecting the Websocket because auto_reconnect is False" ) return # No need to wait exponentially here, since the connection was # established OK, but timed out, or was closed remotely except ( client_err.SlackClientNotConnectedError, client_err.SlackApiError, # NOTE: We may want to catch WebSocket exceptions thrown by aiohttp too ) as exception: await self._dispatch_event(event="error", data=exception) error_code = ( exception.response.get("error", None) if hasattr(exception, "response") else None ) if ( self.auto_reconnect and not self._stopped and error_code != "invalid_auth" # "invalid_auth" is unrecoverable ): await self._wait_exponentially(exception) continue self._logger.exception( "The Websocket encountered an error. Closing the connection..." ) self._close_websocket() raise
async def start_polling(self, timeout=20, relax=0.1, limit=None, reset_webhook=None, fast: typing.Optional[bool] = True, error_sleep: int = 5): """ Start long-polling :param timeout: :param relax: :param limit: :param reset_webhook: :param fast: :return: """ if self._polling: raise RuntimeError('Polling already started') log.info('Start polling.') # context.set_value(MODE, LONG_POLLING) Dispatcher.set_current(self) Bot.set_current(self.bot) if reset_webhook is None: await self.reset_webhook(check=False) if reset_webhook: await self.reset_webhook(check=True) self._polling = True offset = None try: current_request_timeout = self.bot.timeout if current_request_timeout is not sentinel and timeout is not None: request_timeout = aiohttp.ClientTimeout( total=current_request_timeout.total + timeout or 1) else: request_timeout = None while self._polling: try: with self.bot.request_timeout(request_timeout): updates = await self.bot.get_updates(limit=limit, offset=offset, timeout=timeout) except asyncio.CancelledError: break except: log.exception('Cause exception while getting updates.') await asyncio.sleep(error_sleep) continue if updates: log.debug(f"Received {len(updates)} updates.") offset = updates[-1].update_id + 1 self.loop.create_task( self._process_polling_updates(updates, fast)) if relax: await asyncio.sleep(relax) finally: self._close_waiter.set_result(None) log.warning('Polling is stopped.')
async def get_song_id(song_name): songs = [] no_salt_songs = [] try: headers = { "Accept": ( "text/html,application/xhtml+xml,application/xml;" "q=0.9,image/webp,image/apng,*/*;q=0.8" ), "User-Agent": ( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/70.0.3538.110 Safari/537.36" ), } req_json = { "method": "post", "url": "http://music.163.com/api/search/pc", "headers": headers, "data": {"s": song_name, "type": 1, "limit": 50, "offset": 0}, "params": {}, "timeout": 10 } timeout = aiohttp.ClientTimeout(total=60) async with aiohttp.ClientSession(timeout=timeout) as session: async with session.post(cloud_function_url, json=req_json) as resp: status_code = resp.status content = await resp.text() if status_code == 200: r = json.loads(content).get("result", {}).get("songs", []) or [] if isinstance(r, list): no_salt_songs = r songs.extend(r) time.sleep(0.3) req_json["data"]["s"] = song_name + " 管珩心" async with aiohttp.ClientSession(timeout=timeout) as session: async with session.post(cloud_function_url, json=req_json) as resp: status_code = resp.status content = await resp.text() if status_code == 200: r = json.loads(content).get("result", {}).get("songs", []) if isinstance(r, list): songs.extend(r) assert songs except Exception: return None song_name = song_name.lower().strip() name_matched = [] for song in songs: name = song.get("name").lower().strip() if ( name == song_name or (len(name) < len(song_name) and name in song_name) or (len(song_name) < len(name) and song_name in name) ): name_matched.append(song) filtered_songs = name_matched or no_salt_songs for song in filtered_songs: artist_names = "".join([artist.get("name").lower().strip() for artist in song.get("artists", [])]) if "管珩心" in artist_names or "hansy" in artist_names or "泡泡" in artist_names: return song.get("id") return filtered_songs[0].get("id") if filtered_songs else None
def start_loop(loop): asyncio.set_event_loop(loop) global OUR_SESSION timeout = aiohttp.ClientTimeout(sock_connect=60, sock_read=60) OUR_SESSION = aiohttp.ClientSession(connector=aiohttp.TCPConnector(), timeout=timeout) loop.run_forever()
async def p_report(self, ctx): blacklisted = await sql.get_blacklist(self.client.pool, ctx.author.id, ctx.guild.id, 'reporting') if blacklisted: return await ctx.author.send("You have been blacklisted from sending a report or suggestion! Contact a security+ if you believe this to be a mistake!") embed = discord.Embed(title="Is this a report a feature or a bug?", description="Select 💎 if it's a feature, 🦟 if it's a bug.\n❌ to cancel.", color=discord.Color.gold()) msg = await ctx.send(embed=embed) await msg.add_reaction("💎") await msg.add_reaction("🦟") await msg.add_reaction("❌") def check(react, usr): return usr.id == ctx.author.id and react.message.id == msg.id and str(react.emoji) in ["💎", "🦟", "❌"] try: reaction, user = await self.client.wait_for('reaction_add', timeout=1800, check=check) # Wait 1/2 hr max except asyncio.TimeoutError: embed = discord.Embed(title="Timed out!", description="You didn't choose an option in time!", color=discord.Color.red()) await msg.delete() return await ctx.send(embed=embed) if str(reaction.emoji) == '💎': label = 'Feature' elif str(reaction.emoji) == '❌': embed = discord.Embed(title="Cancelled!", description="You cancelled this report!", color=discord.Color.red()) return await msg.edit(embed=embed) else: label = 'Bug' if label == 'Feature': desc = "```**Is your feature request related to a problem? Please describe.**\nA clear and concise description of what the problem is. " \ "Ex. I'm always frustrated when [...]\n\n**How would the feature work? Describe**\nAdd a description about how the feature would work " \ "(e.g. commands, interactions, etc)\n\n**Describe the ideal implementation.**\nA clear and concise description of what you want to happen.\n\n" \ "**Describe alternatives you've considered**\nA clear and concise description of any alternative solutions or features you've considered.\n\n" \ "**Additional context**\nAdd any other context or a screenshot about the feature request here.\n```" else: desc = "```**Describe the bug**\nA clear and concise description of what the bug is.\n\n**To Reproduce**\nSteps to reproduce the behavior:\n1. (list all steps)\n" \ "**Expected behavior**\nA clear and concise description of what you expected to happen.\n\n**Screenshot**\nIf applicable, add a screenshot/image to help " \ "explain your problem.\n\n**What server & channel did this occur in?**\nServer:\nChannel:\n```" embed = discord.Embed(title="Please copy the template & fill it out -- Send CANCEL to cancel.", description=desc, color=discord.Color.gold()) await msg.clear_reactions() await msg.edit(embed=embed) while True: imageb = None def member_check(m): return m.author.id == ctx.author.id and m.channel == msg.channel try: issuemsg = await self.client.wait_for('message', timeout=1800, check=member_check) except asyncio.TimeoutError: embed = discord.Embed(title="Timed out!", description="You didn't write your report in time!", color=discord.Color.red()) await msg.edit(embed=embed) content = str(issuemsg.content) if 'cancel' in content.strip().lower(): embed = discord.Embed(title="Cancelled!", description="You cancelled this report!", color=discord.Color.red()) return await msg.edit(embed=embed) if not content: content = "No issue content provided." if issuemsg.attachments: imageb = issuemsg.attachments[0] if issuemsg.attachments[0].height else None if not imageb: await ctx.send("Please only send images as attachments.", delete_after=7) continue else: imageb = await imageb.read() await issuemsg.delete() break else: await issuemsg.delete() break if imageb: img_data = await utils.image_upload(imageb, ctx, is_rc=False) if not img_data: return await ctx.send( "There was an issue communicating with the image server, try again and if the issue persists – contact the developer.", delete_after=10) image = img_data["secure_url"] content += f"\n\nUploaded Image:\n{image}" title = '[PATREON] [FEATURE] ' if label == 'Feature' else '[PATREON] [BUG] ' title += f'Submitted by {ctx.author.display_name}' header = {'Authorization': f'token {self.client.gh_token}'} payload = { "title": title, 'body': content, 'assignee': 'Jacobvs', 'labels': [label] } try: async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(10)) as cs: async with cs.request("POST", "https://api.github.com/repos/Jacobvs/Rotmg-Discord-Bot/issues", json=payload, headers=header) as r: if r.status != 201: print("GH ISSUE UPLOAD ERROR:") print(r) print(await r.json()) return None else: res = await r.json() except asyncio.TimeoutError: return await ctx.send("There was an issue uploading the issue, please retry the command.", delete_after=10) embed = discord.Embed(title="Thank You!", description="I (Darkmattr) appreciate that you took the time to fill out a report/suggestion!\nI've been notified & will get to " f"it as soon as possible.\n\nTrack the status of your issue here:\n{res['html_url']}", color=discord.Color.green()) await msg.edit(embed=embed)