def test_all_bot_args_custom(self, builder, bot, monkeypatch): defaults = Defaults() request = HTTPXRequest() get_updates_request = HTTPXRequest() builder.token(bot.token).base_url("base_url").base_file_url( "base_file_url").private_key(PRIVATE_KEY).defaults( defaults).arbitrary_callback_data(42).request( request).get_updates_request(get_updates_request) built_bot = builder.build().bot # In the following we access some private attributes of bot and request. this is not # really nice as we want to test the public interface, but here it's hard to ensure by # other means that the parameters are passed correctly assert built_bot.token == bot.token assert built_bot.base_url == "base_url" + bot.token assert built_bot.base_file_url == "base_file_url" + bot.token assert built_bot.defaults is defaults assert built_bot.request is request assert built_bot._request[0] is get_updates_request assert built_bot.callback_data_cache.maxsize == 42 assert built_bot.private_key @dataclass class Client: timeout: object proxies: object limits: object monkeypatch.setattr(httpx, "AsyncClient", Client) builder = ApplicationBuilder().token(bot.token) builder.connection_pool_size(1).connect_timeout(2).pool_timeout( 3).read_timeout(4).write_timeout(5).proxy_url("proxy_url") app = builder.build() client = app.bot.request._client assert client.timeout == httpx.Timeout(pool=3, connect=2, read=4, write=5) assert client.limits == httpx.Limits(max_connections=1, max_keepalive_connections=1) assert client.proxies == "proxy_url" builder = ApplicationBuilder().token(bot.token) builder.get_updates_connection_pool_size( 1).get_updates_connect_timeout(2).get_updates_pool_timeout( 3).get_updates_read_timeout(4).get_updates_write_timeout( 5).get_updates_proxy_url("proxy_url") app = builder.build() client = app.bot._request[0]._client assert client.timeout == httpx.Timeout(pool=3, connect=2, read=4, write=5) assert client.limits == httpx.Limits(max_connections=1, max_keepalive_connections=1) assert client.proxies == "proxy_url"
def test_init(self, monkeypatch): @dataclass class Client: timeout: object proxies: object limits: object monkeypatch.setattr(httpx, "AsyncClient", Client) request = HTTPXRequest() assert request._client.timeout == httpx.Timeout(connect=5.0, read=5.0, write=5.0, pool=1.0) assert request._client.proxies is None assert request._client.limits == httpx.Limits( max_connections=1, max_keepalive_connections=1 ) request = HTTPXRequest( connection_pool_size=42, proxy_url="proxy_url", connect_timeout=43, read_timeout=44, write_timeout=45, pool_timeout=46, ) assert request._client.proxies == "proxy_url" assert request._client.limits == httpx.Limits( max_connections=42, max_keepalive_connections=42 ) assert request._client.timeout == httpx.Timeout(connect=43, read=44, write=45, pool=46)
def test_default_values(self, bot, monkeypatch, builder): @dataclass class Client: timeout: object proxies: object limits: object monkeypatch.setattr(httpx, "AsyncClient", Client) app = builder.token(bot.token).build() assert isinstance(app, Application) assert app.concurrent_updates == 0 assert isinstance(app.bot, ExtBot) assert isinstance(app.bot.request, HTTPXRequest) assert "api.telegram.org" in app.bot.base_url assert bot.token in app.bot.base_url assert "api.telegram.org" in app.bot.base_file_url assert bot.token in app.bot.base_file_url assert app.bot.private_key is None assert app.bot.arbitrary_callback_data is False assert app.bot.defaults is None get_updates_client = app.bot._request[0]._client assert get_updates_client.limits == httpx.Limits( max_connections=1, max_keepalive_connections=1) assert get_updates_client.proxies is None assert get_updates_client.timeout == httpx.Timeout(connect=5.0, read=5.0, write=5.0, pool=1.0) client = app.bot.request._client assert client.limits == httpx.Limits(max_connections=256, max_keepalive_connections=256) assert client.proxies is None assert client.timeout == httpx.Timeout(connect=5.0, read=5.0, write=5.0, pool=1.0) assert isinstance(app.update_queue, asyncio.Queue) assert isinstance(app.updater, Updater) assert app.updater.bot is app.bot assert app.updater.update_queue is app.update_queue assert isinstance(app.job_queue, JobQueue) assert app.job_queue.application is app assert app.persistence is None assert app.post_init is None assert app.post_shutdown is None
def __init__( self, limit: int = 30, max_connections: int = 100, timeout: Optional[float] = 20, env: bool = False, internal: Optional[bool] = False, proxies: Optional[str] = None, ): """ :param limit: :param max_connections: :param timeout: :param env: debug输出:HTTPX_LOG_LEVEL=debug :param internal: :param proxies: """ self.internal: Optional[int] = internal self.client: httpx.AsyncClient = httpx.AsyncClient( verify=False, timeout=httpx.Timeout(timeout, connect=60), proxies=proxies, # type: ignore limits=httpx.Limits( max_keepalive_connections=limit, max_connections=max_connections ), trust_env=env, follow_redirects=True, event_hooks={"response": [raise_on_4xx_5xx]}, )
async def send_request( self, url: str, method: str = "get", **kwargs: Any ) -> httpx.Response: if self._client is None: self._client = httpx.AsyncClient( http2=True, limits=httpx.Limits(keepalive_expiry=KEEPALIVE_EXPIRY), ) LOGGER.debug(f"-> {method.upper()} {url} {kwargs}") r = await self._client.request( method, url, headers=AQUALINK_HTTP_HEADERS, **kwargs ) LOGGER.debug(f"<- {r.status_code} {r.reason_phrase} - {url}") if r.status_code == 401: m = "Unauthorized Access, check your credentials and try again" self._logged = False raise AqualinkServiceUnauthorizedException if r.status_code != 200: m = f"Unexpected response: {r.status_code} {r.reason_phrase}" raise AqualinkServiceException(m) return r
async def get_html(self, name, proxy): # proxy = proxy.replace("http://", "") proxies = { "http://": "http://{proxy}".format(proxy=proxy), "https://": "http://{proxy}".format(proxy=proxy), } # max_keepalive,允许的保持活动连接数或 None 始终允许。(预设10) # max_connections,允许的最大连接数或 None 无限制。(默认为100) limits = httpx.Limits(max_keepalive_connections=self.max_keepalive_connections, max_connections=self.max_connections) try: async with httpx.AsyncClient(limits=limits, proxies=proxies, timeout=self.time_out,verify=False) as client: resp = await client.get(self.test_url) assert resp.status_code == 200 if self.redis.set(proxy, proxy): logger.info(f"{proxy}, 校验成功") else: self.redis.remove(proxy) logger.error(f"{proxy}, 校验失败,不可用代理") except Exception as err: print(resp.status_code) self.redis.remove(proxy) logger.error(f"{proxy}, err : {err} 校验失败,不可用代理") return finally: self.num += 1
def __init__( self, connection_pool_size: int = 1, proxy_url: str = None, read_timeout: Optional[float] = 5.0, write_timeout: Optional[float] = 5.0, connect_timeout: Optional[float] = 5.0, pool_timeout: Optional[float] = 1.0, ): timeout = httpx.Timeout( connect=connect_timeout, read=read_timeout, write=write_timeout, pool=pool_timeout, ) limits = httpx.Limits( max_connections=connection_pool_size, max_keepalive_connections=connection_pool_size, ) self._client_kwargs = dict( timeout=timeout, proxies=proxy_url, limits=limits, ) self._client = self._build_client()
async def query_items(collection, url, params, max_sync_queries=10, item_template="${year}/${month}/${day}"): found = hits(url, params) limit = params['limit'] semaphore = asyncio.Semaphore(max_sync_queries) total_pages = math.ceil(found / limit) logger.info(f"Found {found} items ({total_pages} pages)") queries = [] transport = httpx.AsyncHTTPTransport(retries=3) limits = httpx.Limits(max_keepalive_connections=None, max_connections=5000) async with httpx.AsyncClient(timeout=None, pool_limits=limits, transport=transport) as client: for p in range(1, total_pages + 1): _params = {'page': p} _params.update(params) queries.append( query_items_page(collection, url, client, semaphore, params=_params, item_template=item_template)) return await asyncio.gather(*queries)
def __init__(self, ssl=True, verify=True, proxy_url=None, auth=None, limits=None): if proxy_url and 'crawlera.com' in proxy_url: assert ssl is False base_headers = None if proxy_url: base_headers = {'User-Agent': random.choice(USER_AGENTS)} if limits is None: limits = httpx.Limits(max_connections=100, max_keepalive_connections=20) self.client = httpx.AsyncClient(headers=base_headers, verify=ssl, auth=auth, proxies=proxy_url, timeout=40) self.request_lock = None self.proxy_url = proxy_url self.limit_remaining_by_url_prefix = defaultdict(list) self.response_statuses = defaultdict(list)
def create_http_client() -> httpx.AsyncClient: timeout = httpx.Timeout(read=300, pool=200) pool_limits = httpx.Limits( max_connections=10, max_keepalive_connections=GNS3_CONTROLLER_NUM_MAX_CONN) client = httpx.AsyncClient(pool_limits=pool_limits, timeout=timeout) return client
async def _get_all_return_codes( urls, timeout: float, max_connections: int, max_keepalive_connections: int, follow_codes: list[int], is_allowed: Callable | None = None, ): # return await asyncio.gather(*map(_get_return_code, urls)) ret = [] limits = httpx.Limits( max_keepalive_connections=max_keepalive_connections, max_connections=max_connections, ) async with httpx.AsyncClient(limits=limits) as client: tasks = map( lambda x: _get_return_code( x, client, timeout, follow_codes=follow_codes, is_allowed=is_allowed ), urls, ) for task in track( asyncio.as_completed(tasks), description="Checking...", total=len(urls) ): ret.append(await task) return ret
def new_client(enable_http, verify, enable_http2, max_connections, max_keepalive_connections, keepalive_expiry, proxies, local_address, retries, max_redirects): limit = httpx.Limits(max_connections=max_connections, max_keepalive_connections=max_keepalive_connections, keepalive_expiry=keepalive_expiry) # See https://www.python-httpx.org/advanced/#routing mounts = {} for pattern, proxy_url in iter_proxies(proxies): if not enable_http and (pattern == 'http' or pattern.startswith('http://')): continue if proxy_url.startswith('socks4://') \ or proxy_url.startswith('socks5://') \ or proxy_url.startswith('socks5h://'): mounts[pattern] = get_transport_for_socks_proxy( verify, enable_http2, local_address, proxy_url, limit, retries) else: mounts[pattern] = get_transport(verify, enable_http2, local_address, proxy_url, limit, retries) if not enable_http: mounts['http://'] = AsyncHTTPTransportNoHttp() transport = get_transport(verify, enable_http2, local_address, None, limit, retries) return httpx.AsyncClient(transport=transport, mounts=mounts, max_redirects=max_redirects)
async def collect_service_info() -> Iterable[Tuple[str, httpx.Response]]: max_connections = 2 async with httpx.AsyncClient( http2=True, limits=httpx.Limits(max_connections=max_connections), timeout=10.0, ) as client: r = await client.get( urllib.parse.urljoin( BASE_URL, "reference_policies_actions-resources-contextkeys.html", )) parsed_html = BeautifulSoup(r.text, features="lxml") service_links: List[str] = [] for link in parsed_html.body.find_all("a"): href = link.attrs["href"] if href.startswith("./list_") and href.endswith(".html"): service_links.append(r.url.join(href)) # This doesn't work at the moment, # see https://github.com/encode/httpx/issues/1171 # # service_page_responses = await asyncio.gather( # *[client.get(link) for link in service_links] # ) # # workaround service_page_responses = [] for start in range(0, len(service_links), max_connections): service_page_responses += await asyncio.gather(*[ client.get(link) for link in service_links[start:start + max_connections] ]) return zip(service_links, service_page_responses)
async def test_pool_timeout(server): limits = httpx.Limits(max_connections=1) timeout = httpx.Timeout(None, pool=1e-4) async with httpx.AsyncClient(limits=limits, timeout=timeout) as client: async with client.stream("GET", server.url): with pytest.raises(httpx.PoolTimeout): await client.get("http://localhost:8000/")
def get_session(self) -> httpx.AsyncClient: """Return client for making asynchronous requests. """ limits = httpx.Limits( max_keepalive_connections=self.KEEPALIVE_CONNECTIONS, max_connections=self.MAX_CONNECTIONS) session = httpx.AsyncClient(limits=limits, timeout=self.TIMEOUT) return session
def test_pool_limits_deprecated(): limits = httpx.Limits() with pytest.warns(DeprecationWarning): httpx.Client(pool_limits=limits) with pytest.warns(DeprecationWarning): httpx.AsyncClient(pool_limits=limits)
def __init__(self, url, credentials=None, *args, **kwargs): limits = httpx.Limits(max_keepalive_connections=MAX_CONNECTIONS // 2, max_connections=MAX_CONNECTIONS) super().__init__(base_url=url, limits=limits, *args, **kwargs) self._credentials = None if credentials: name, password = credentials self._credentials = {'name': name, 'password': password}
def _real_extract(self, url): self.report_extraction(url) try: _timeout = httpx.Timeout(15, connect=15) _limits = httpx.Limits(max_keepalive_connections=None, max_connections=None) client = httpx.Client( timeout=_timeout, limits=_limits, headers=self.get_param('http_headers'), follow_redirects=True, verify=(not self._downloader.params.get('nocheckcertificate'))) res = client.get(url) webpage = re.sub('[\t\n]', '', html.unescape(res.text)) mobj = re.findall(r'og:title\" content\=\"([^\"]+)\"', webpage) title = mobj[0] if mobj else url.split("/")[-1] mobj = re.findall(r'postid-(\d+)', webpage) mobj2 = re.findall( r"shortlink\' href\=\'https://(?:xvids\.gq|gay-tubes\.cc)/\?p\=(\d+)\'", webpage) videoid = mobj[0] if mobj else mobj2[0] if mobj2 else "video_id" mobj = re.findall(r'contentURL\" content="([^\"]+)\"', webpage) real_url = mobj[0] if mobj else "" if not real_url: raise ExtractError("Can't find real URL") _info_video = self._get_info_video(real_url, client) format_video = { 'format_id': 'http-mp4', 'url': _info_video.get('url'), 'filesize': _info_video.get('filesize'), 'ext': 'mp4' } entry_video = { 'id': videoid, 'title': sanitize_filename(title, restricted=True), 'formats': [format_video], 'ext': 'mp4' } except Exception as e: self.to_screen(e) raise finally: client.close() return entry_video
def _real_extract(self, url): self.report_extraction(url) try: _timeout = httpx.Timeout(15, connect=15) _limits = httpx.Limits(max_keepalive_connections=None, max_connections=None) client = httpx.Client( timeout=_timeout, limits=_limits, headers=self.get_param('http_headers'), follow_redirects=True, verify=(not self._downloader.params.get('nocheckcertificate'))) res = client.get(url) if res.status_code >= 400: raise ExtractorError("Page not found") webpage = re.sub('[\t\n]', '', html.unescape(res.text)) mobj = re.findall(r'gallery-data" type="text/json">([^\<]+)<', webpage) if mobj: _player_info = json.loads(mobj[0]) _url = _player_info.get('videos', {}).get('mp4', {}) _info_video = self._get_info_video(_url, client) format_video = { 'format_id': 'http-mp4', 'url': _info_video.get('url'), 'filesize': _info_video.get('filesize'), 'ext': 'mp4' } return ({ 'id': str(_player_info.get('id')), 'title': sanitize_filename(_player_info.get('title'), restricted=True), 'formats': [format_video], 'ext': 'mp4' }) else: raise ExtractorError("No video details found") except ExtractorError as e: raise except Exception as e: lines = traceback.format_exception(*sys.exc_info()) self.to_screen(f"{repr(e)}\n{'!!'.join(lines)}") raise ExtractorError(repr(e)) finally: client.close()
async def creat_tasks_status_ok(urls, limit, timeout): limits = httpx.Limits(max_keepalive_connections=limit, max_connections=limit) async with httpx.AsyncClient(limits=limits, timeout=None) as client: tasks = [ asyncio.create_task(_is_response_staus_ok(client, url, timeout)) for url in urls ] status_ok = await asyncio.gather(*tasks) return status_ok
def get_client() -> httpx.AsyncClient: """Returns a httpx client that can be used with get_updates().""" transport = httpx.AsyncHTTPTransport( limits=httpx.Limits(max_keepalive_connections=5), retries=5, ) headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'packet-windoze', } return httpx.AsyncClient(headers=headers, timeout=30, transport=transport)
def start_client(self, msg=None): if msg: self.logger.info(f"{msg} client init") client = httpx.Client(timeout=httpx.Timeout(20, connect=60), limits=httpx.Limits(max_keepalive_connections=None, max_connections=None), headers={'user-agent': self.useragent}) client.get("https://readcomiconline.li") client.cookies.set("rco_quality", "hq", "readcomiconline.li") client.cookies.set("rco_readType", "1", "readcomiconline.li") time.sleep(1) client.get("https://readcomiconline.li") time.sleep(1) return client
async def main(): responses = [] urls = get_api_urls(num=2000) limits = httpx.Limits(max_keepalive_connections=20, max_connections=80) # 104 # limits = httpx.Limits(max_keepalive_connections=10, max_connections=20) # 104 # limits = httpx.Limits(max_keepalive_connections=5, max_connections=10) # 102 s = time.perf_counter() async with httpx.AsyncClient(limits=limits, timeout=20.0) as client: responses = await fetch(client, urls) elapsed = time.perf_counter() - s print(f"elapsed time: {elapsed}") requests_per_second = len(urls) / elapsed print(f"requests/s: {requests_per_second}")
def _client(self): if self._client_storage is None: logger.debug('Creating a new client instance.') limits = httpx.Limits(max_connections=1, max_keepalive_connections=0) self._client_storage = httpx.Client( auth=self._authenticate_request, verify=self._root_cert_path, http2=True, timeout=10.0, limits=limits, base_url=self._base_url) return self._client_storage
def launch(): # Run the above function and store its results in a variable. full_file_paths = get_filepaths(".") parsed = [] limits = httpx.Limits(max_keepalive_connections=5, max_connections=5) client = httpx.Client(verify=False, limits=limits) for file in full_file_paths: return_val = submit_to_blackd(file, client) if return_val: parsed.append(return_val) if len(parsed) != len(full_file_paths): print( f"Not parsed everything, {len(full_file_paths) - len(parsed)} remaining" )
def test_custom_httpx_config(): """ Test the SchemaRegistryClient creation with custom httpx config """ timeout = httpx.Timeout(10.0, connect=60.0) pool_limits = httpx.Limits(max_keepalive=5, max_connections=10) client = SchemaRegistryClient( url="https://127.0.0.1:65534", timeout=timeout, pool_limits=pool_limits, ) assert client.timeout == timeout assert client.pool_limits == pool_limits
async def process_urls(headers, username, model_id, urls): if urls: operations.create_database(model_id) media_ids = operations.get_media_ids(model_id) separated_urls = separate_by_id(urls, media_ids) path = pathlib.Path.cwd() / username path.mkdir(exist_ok=True) # Added pool limit: limits = httpx.Limits(max_connections=8, max_keepalive_connections=5) async with httpx.AsyncClient(headers=headers, limits=limits, timeout=None) as c: add_cookies(c) aws = [asyncio.create_task( download(c, path, model_id, *url)) for url in separated_urls] photo_count = 0 video_count = 0 total_bytes_downloaded = 0 data = 0 desc = 'Progress: ({p_count} photos, {v_count} videos || {data})' with tqdm(desc=desc.format(p_count=photo_count, v_count=video_count, data=data), total=len(aws), colour='cyan', leave=True) as main_bar: for coro in asyncio.as_completed(aws): try: media_type, num_bytes_downloaded = await coro except Exception as e: print(e) total_bytes_downloaded += num_bytes_downloaded data = convert_num_bytes(total_bytes_downloaded) if media_type == 'photo': photo_count += 1 main_bar.set_description( desc.format( p_count=photo_count, v_count=video_count, data=data), refresh=False) elif media_type == 'video': video_count += 1 main_bar.set_description( desc.format( p_count=photo_count, v_count=video_count, data=data), refresh=False) main_bar.update()
async def measure_server(self, epoch): print("measure server") print(epoch.urls[0]) max_connections = min(epoch.number_of_connections, 50) print("max_connections: ", max_connections) # max_connections = 10 limits = httpx.Limits(max_keepalive_connections=10, max_connections=max_connections) timeout = httpx.Timeout(30.0, connect=60.0) start = time.perf_counter() async with httpx.AsyncClient(limits=limits, timeout=timeout) as client: responses = await asyncio.gather( *[client.get(url) for url in epoch.urls]) elapsed = time.perf_counter() - start print("done: ", elapsed) print("responses status: ", responses[0].status_code) return elapsed, responses
class PortalImageDownloader(object): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36' } limits = httpx.Limits(max_keepalive_connections=20, max_connections=40) def __init__(self, dirname): self.img_dir = Path(dirname) self.img_dir.mkdir(parents=True, exist_ok=True) async def __download_img(self, client, portal): img_path = self.img_dir.joinpath( f"{portal['lng']}_{portal['lat']}.jpg") if img_path.exists(): return True try: async with client.stream('GET', portal['url']) as r: if r.status_code == httpx.codes.OK: async with aiofiles.open(str(img_path), 'wb') as f: async for chunk in r.aiter_bytes(): await f.write(chunk) return True else: return False except: await asyncio.to_thread(partial(img_path.unlink, missing_ok=True)) return False async def download_from_csv(self, portals_csv): portals_list = await asyncio.to_thread(PortalsCSV.read_csv, portals_csv) async with httpx.AsyncClient(headers=self.headers, limits=self.limits) as client: print(f'[!] 正在根据({portals_csv})下载Portal图像...') tasks = [ asyncio.create_task(self.__download_img(client, portal)) for portal in portals_list ] unfinished = [ n for n, coro in enumerate(tqdm.as_completed(tasks), 1) if await coro is False ] print(f'[!] 图像下载完成。') if any(unfinished): print(f"[!] 有{len(unfinished)}个图像下载失败,请重新执行下载剩余图像。")
async def run(count: int = 1, batch: int = 1) -> None: sema = asyncio.Semaphore(batch) async def get(s: httpx.AsyncClient) -> bool: async with sema: resp = await s.get(URL) return resp.status_code == 200 and resp.text == 'OK\n' fails = 0 limits = httpx.Limits(max_keepalive_connections=batch, max_connections=batch) async with httpx.AsyncClient(limits=limits) as s: tasks = [get(s) for _ in range(count)] for task in asyncio.as_completed(tasks): fails += not await task assert fails == 0