Exemplo n.º 1
0
    def test_all_bot_args_custom(self, builder, bot, monkeypatch):
        defaults = Defaults()
        request = HTTPXRequest()
        get_updates_request = HTTPXRequest()
        builder.token(bot.token).base_url("base_url").base_file_url(
            "base_file_url").private_key(PRIVATE_KEY).defaults(
                defaults).arbitrary_callback_data(42).request(
                    request).get_updates_request(get_updates_request)
        built_bot = builder.build().bot

        # In the following we access some private attributes of bot and request. this is not
        # really nice as we want to test the public interface, but here it's hard to ensure by
        # other means that the parameters are passed correctly

        assert built_bot.token == bot.token
        assert built_bot.base_url == "base_url" + bot.token
        assert built_bot.base_file_url == "base_file_url" + bot.token
        assert built_bot.defaults is defaults
        assert built_bot.request is request
        assert built_bot._request[0] is get_updates_request
        assert built_bot.callback_data_cache.maxsize == 42
        assert built_bot.private_key

        @dataclass
        class Client:
            timeout: object
            proxies: object
            limits: object

        monkeypatch.setattr(httpx, "AsyncClient", Client)

        builder = ApplicationBuilder().token(bot.token)
        builder.connection_pool_size(1).connect_timeout(2).pool_timeout(
            3).read_timeout(4).write_timeout(5).proxy_url("proxy_url")
        app = builder.build()
        client = app.bot.request._client

        assert client.timeout == httpx.Timeout(pool=3,
                                               connect=2,
                                               read=4,
                                               write=5)
        assert client.limits == httpx.Limits(max_connections=1,
                                             max_keepalive_connections=1)
        assert client.proxies == "proxy_url"

        builder = ApplicationBuilder().token(bot.token)
        builder.get_updates_connection_pool_size(
            1).get_updates_connect_timeout(2).get_updates_pool_timeout(
                3).get_updates_read_timeout(4).get_updates_write_timeout(
                    5).get_updates_proxy_url("proxy_url")
        app = builder.build()
        client = app.bot._request[0]._client

        assert client.timeout == httpx.Timeout(pool=3,
                                               connect=2,
                                               read=4,
                                               write=5)
        assert client.limits == httpx.Limits(max_connections=1,
                                             max_keepalive_connections=1)
        assert client.proxies == "proxy_url"
Exemplo n.º 2
0
    def test_init(self, monkeypatch):
        @dataclass
        class Client:
            timeout: object
            proxies: object
            limits: object

        monkeypatch.setattr(httpx, "AsyncClient", Client)

        request = HTTPXRequest()
        assert request._client.timeout == httpx.Timeout(connect=5.0, read=5.0, write=5.0, pool=1.0)
        assert request._client.proxies is None
        assert request._client.limits == httpx.Limits(
            max_connections=1, max_keepalive_connections=1
        )

        request = HTTPXRequest(
            connection_pool_size=42,
            proxy_url="proxy_url",
            connect_timeout=43,
            read_timeout=44,
            write_timeout=45,
            pool_timeout=46,
        )
        assert request._client.proxies == "proxy_url"
        assert request._client.limits == httpx.Limits(
            max_connections=42, max_keepalive_connections=42
        )
        assert request._client.timeout == httpx.Timeout(connect=43, read=44, write=45, pool=46)
Exemplo n.º 3
0
    def test_default_values(self, bot, monkeypatch, builder):
        @dataclass
        class Client:
            timeout: object
            proxies: object
            limits: object

        monkeypatch.setattr(httpx, "AsyncClient", Client)

        app = builder.token(bot.token).build()

        assert isinstance(app, Application)
        assert app.concurrent_updates == 0

        assert isinstance(app.bot, ExtBot)
        assert isinstance(app.bot.request, HTTPXRequest)
        assert "api.telegram.org" in app.bot.base_url
        assert bot.token in app.bot.base_url
        assert "api.telegram.org" in app.bot.base_file_url
        assert bot.token in app.bot.base_file_url
        assert app.bot.private_key is None
        assert app.bot.arbitrary_callback_data is False
        assert app.bot.defaults is None

        get_updates_client = app.bot._request[0]._client
        assert get_updates_client.limits == httpx.Limits(
            max_connections=1, max_keepalive_connections=1)
        assert get_updates_client.proxies is None
        assert get_updates_client.timeout == httpx.Timeout(connect=5.0,
                                                           read=5.0,
                                                           write=5.0,
                                                           pool=1.0)

        client = app.bot.request._client
        assert client.limits == httpx.Limits(max_connections=256,
                                             max_keepalive_connections=256)
        assert client.proxies is None
        assert client.timeout == httpx.Timeout(connect=5.0,
                                               read=5.0,
                                               write=5.0,
                                               pool=1.0)

        assert isinstance(app.update_queue, asyncio.Queue)
        assert isinstance(app.updater, Updater)
        assert app.updater.bot is app.bot
        assert app.updater.update_queue is app.update_queue

        assert isinstance(app.job_queue, JobQueue)
        assert app.job_queue.application is app

        assert app.persistence is None
        assert app.post_init is None
        assert app.post_shutdown is None
Exemplo n.º 4
0
    def __init__(
        self,
        limit: int = 30,
        max_connections: int = 100,
        timeout: Optional[float] = 20,
        env: bool = False,
        internal: Optional[bool] = False,
        proxies: Optional[str] = None,
    ):
        """

        :param limit:
        :param max_connections:
        :param timeout:
        :param env:  debug输出:HTTPX_LOG_LEVEL=debug
        :param internal:
        :param proxies:
        """
        self.internal: Optional[int] = internal
        self.client: httpx.AsyncClient = httpx.AsyncClient(
            verify=False,
            timeout=httpx.Timeout(timeout, connect=60),
            proxies=proxies,  # type: ignore
            limits=httpx.Limits(
                max_keepalive_connections=limit, max_connections=max_connections
            ),
            trust_env=env,
            follow_redirects=True,
            event_hooks={"response": [raise_on_4xx_5xx]},
        )
Exemplo n.º 5
0
    async def send_request(
        self, url: str, method: str = "get", **kwargs: Any
    ) -> httpx.Response:
        if self._client is None:
            self._client = httpx.AsyncClient(
                http2=True,
                limits=httpx.Limits(keepalive_expiry=KEEPALIVE_EXPIRY),
            )

        LOGGER.debug(f"-> {method.upper()} {url} {kwargs}")
        r = await self._client.request(
            method, url, headers=AQUALINK_HTTP_HEADERS, **kwargs
        )

        LOGGER.debug(f"<- {r.status_code} {r.reason_phrase} - {url}")

        if r.status_code == 401:
            m = "Unauthorized Access, check your credentials and try again"
            self._logged = False
            raise AqualinkServiceUnauthorizedException

        if r.status_code != 200:
            m = f"Unexpected response: {r.status_code} {r.reason_phrase}"
            raise AqualinkServiceException(m)

        return r
Exemplo n.º 6
0
 async def get_html(self, name, proxy):
     # proxy = proxy.replace("http://", "")
     proxies = {
         "http://": "http://{proxy}".format(proxy=proxy),
         "https://": "http://{proxy}".format(proxy=proxy),
     }
     # max_keepalive,允许的保持活动连接数或 None 始终允许。(预设10)
     # max_connections,允许的最大连接数或 None 无限制。(默认为100)
     limits = httpx.Limits(max_keepalive_connections=self.max_keepalive_connections,
                           max_connections=self.max_connections)
     try:
         async with httpx.AsyncClient(limits=limits, proxies=proxies, timeout=self.time_out,verify=False) as client:
             resp = await client.get(self.test_url)
             assert resp.status_code == 200
             if self.redis.set(proxy, proxy):
                 logger.info(f"{proxy}, 校验成功")
             else:
                 self.redis.remove(proxy)
                 logger.error(f"{proxy}, 校验失败,不可用代理")
     except Exception as err:
         print(resp.status_code)
         self.redis.remove(proxy)
         logger.error(f"{proxy}, err : {err}  校验失败,不可用代理")
         return
     finally:
             self.num += 1
    def __init__(
        self,
        connection_pool_size: int = 1,
        proxy_url: str = None,
        read_timeout: Optional[float] = 5.0,
        write_timeout: Optional[float] = 5.0,
        connect_timeout: Optional[float] = 5.0,
        pool_timeout: Optional[float] = 1.0,
    ):
        timeout = httpx.Timeout(
            connect=connect_timeout,
            read=read_timeout,
            write=write_timeout,
            pool=pool_timeout,
        )
        limits = httpx.Limits(
            max_connections=connection_pool_size,
            max_keepalive_connections=connection_pool_size,
        )
        self._client_kwargs = dict(
            timeout=timeout,
            proxies=proxy_url,
            limits=limits,
        )

        self._client = self._build_client()
Exemplo n.º 8
0
async def query_items(collection,
                      url,
                      params,
                      max_sync_queries=10,
                      item_template="${year}/${month}/${day}"):
    found = hits(url, params)
    limit = params['limit']
    semaphore = asyncio.Semaphore(max_sync_queries)

    total_pages = math.ceil(found / limit)
    logger.info(f"Found {found} items ({total_pages} pages)")

    queries = []
    transport = httpx.AsyncHTTPTransport(retries=3)
    limits = httpx.Limits(max_keepalive_connections=None, max_connections=5000)
    async with httpx.AsyncClient(timeout=None,
                                 pool_limits=limits,
                                 transport=transport) as client:
        for p in range(1, total_pages + 1):
            _params = {'page': p}
            _params.update(params)
            queries.append(
                query_items_page(collection,
                                 url,
                                 client,
                                 semaphore,
                                 params=_params,
                                 item_template=item_template))
        return await asyncio.gather(*queries)
Exemplo n.º 9
0
    def __init__(self,
                 ssl=True,
                 verify=True,
                 proxy_url=None,
                 auth=None,
                 limits=None):

        if proxy_url and 'crawlera.com' in proxy_url:
            assert ssl is False

        base_headers = None
        if proxy_url:
            base_headers = {'User-Agent': random.choice(USER_AGENTS)}

        if limits is None:
            limits = httpx.Limits(max_connections=100,
                                  max_keepalive_connections=20)

        self.client = httpx.AsyncClient(headers=base_headers,
                                        verify=ssl,
                                        auth=auth,
                                        proxies=proxy_url,
                                        timeout=40)
        self.request_lock = None
        self.proxy_url = proxy_url

        self.limit_remaining_by_url_prefix = defaultdict(list)
        self.response_statuses = defaultdict(list)
Exemplo n.º 10
0
 def create_http_client() -> httpx.AsyncClient:
     timeout = httpx.Timeout(read=300, pool=200)
     pool_limits = httpx.Limits(
         max_connections=10,
         max_keepalive_connections=GNS3_CONTROLLER_NUM_MAX_CONN)
     client = httpx.AsyncClient(pool_limits=pool_limits, timeout=timeout)
     return client
Exemplo n.º 11
0
async def _get_all_return_codes(
    urls,
    timeout: float,
    max_connections: int,
    max_keepalive_connections: int,
    follow_codes: list[int],
    is_allowed: Callable | None = None,
):
    # return await asyncio.gather(*map(_get_return_code, urls))
    ret = []
    limits = httpx.Limits(
        max_keepalive_connections=max_keepalive_connections,
        max_connections=max_connections,
    )
    async with httpx.AsyncClient(limits=limits) as client:
        tasks = map(
            lambda x: _get_return_code(
                x, client, timeout, follow_codes=follow_codes, is_allowed=is_allowed
            ),
            urls,
        )
        for task in track(
            asyncio.as_completed(tasks), description="Checking...", total=len(urls)
        ):
            ret.append(await task)

    return ret
Exemplo n.º 12
0
def new_client(enable_http, verify, enable_http2, max_connections,
               max_keepalive_connections, keepalive_expiry, proxies,
               local_address, retries, max_redirects):
    limit = httpx.Limits(max_connections=max_connections,
                         max_keepalive_connections=max_keepalive_connections,
                         keepalive_expiry=keepalive_expiry)
    # See https://www.python-httpx.org/advanced/#routing
    mounts = {}
    for pattern, proxy_url in iter_proxies(proxies):
        if not enable_http and (pattern == 'http'
                                or pattern.startswith('http://')):
            continue
        if proxy_url.startswith('socks4://') \
           or proxy_url.startswith('socks5://') \
           or proxy_url.startswith('socks5h://'):
            mounts[pattern] = get_transport_for_socks_proxy(
                verify, enable_http2, local_address, proxy_url, limit, retries)
        else:
            mounts[pattern] = get_transport(verify, enable_http2,
                                            local_address, proxy_url, limit,
                                            retries)

    if not enable_http:
        mounts['http://'] = AsyncHTTPTransportNoHttp()

    transport = get_transport(verify, enable_http2, local_address, None, limit,
                              retries)
    return httpx.AsyncClient(transport=transport,
                             mounts=mounts,
                             max_redirects=max_redirects)
Exemplo n.º 13
0
async def collect_service_info() -> Iterable[Tuple[str, httpx.Response]]:
    max_connections = 2
    async with httpx.AsyncClient(
            http2=True,
            limits=httpx.Limits(max_connections=max_connections),
            timeout=10.0,
    ) as client:
        r = await client.get(
            urllib.parse.urljoin(
                BASE_URL,
                "reference_policies_actions-resources-contextkeys.html",
            ))
        parsed_html = BeautifulSoup(r.text, features="lxml")

        service_links: List[str] = []
        for link in parsed_html.body.find_all("a"):
            href = link.attrs["href"]
            if href.startswith("./list_") and href.endswith(".html"):
                service_links.append(r.url.join(href))

        # This doesn't work at the moment,
        # see https://github.com/encode/httpx/issues/1171
        #
        # service_page_responses = await asyncio.gather(
        #     *[client.get(link) for link in service_links]
        # )
        #
        # workaround
        service_page_responses = []
        for start in range(0, len(service_links), max_connections):
            service_page_responses += await asyncio.gather(*[
                client.get(link)
                for link in service_links[start:start + max_connections]
            ])
        return zip(service_links, service_page_responses)
Exemplo n.º 14
0
async def test_pool_timeout(server):
    limits = httpx.Limits(max_connections=1)
    timeout = httpx.Timeout(None, pool=1e-4)

    async with httpx.AsyncClient(limits=limits, timeout=timeout) as client:
        async with client.stream("GET", server.url):
            with pytest.raises(httpx.PoolTimeout):
                await client.get("http://localhost:8000/")
Exemplo n.º 15
0
 def get_session(self) -> httpx.AsyncClient:
     """Return client for making asynchronous requests.
     """
     limits = httpx.Limits(
         max_keepalive_connections=self.KEEPALIVE_CONNECTIONS,
         max_connections=self.MAX_CONNECTIONS)
     session = httpx.AsyncClient(limits=limits, timeout=self.TIMEOUT)
     return session
Exemplo n.º 16
0
def test_pool_limits_deprecated():
    limits = httpx.Limits()

    with pytest.warns(DeprecationWarning):
        httpx.Client(pool_limits=limits)

    with pytest.warns(DeprecationWarning):
        httpx.AsyncClient(pool_limits=limits)
Exemplo n.º 17
0
    def __init__(self, url, credentials=None, *args, **kwargs):
        limits = httpx.Limits(max_keepalive_connections=MAX_CONNECTIONS // 2,
                              max_connections=MAX_CONNECTIONS)
        super().__init__(base_url=url, limits=limits, *args, **kwargs)

        self._credentials = None
        if credentials:
            name, password = credentials
            self._credentials = {'name': name, 'password': password}
Exemplo n.º 18
0
    def _real_extract(self, url):

        self.report_extraction(url)

        try:

            _timeout = httpx.Timeout(15, connect=15)
            _limits = httpx.Limits(max_keepalive_connections=None,
                                   max_connections=None)
            client = httpx.Client(
                timeout=_timeout,
                limits=_limits,
                headers=self.get_param('http_headers'),
                follow_redirects=True,
                verify=(not self._downloader.params.get('nocheckcertificate')))

            res = client.get(url)
            webpage = re.sub('[\t\n]', '', html.unescape(res.text))
            mobj = re.findall(r'og:title\" content\=\"([^\"]+)\"', webpage)
            title = mobj[0] if mobj else url.split("/")[-1]

            mobj = re.findall(r'postid-(\d+)', webpage)
            mobj2 = re.findall(
                r"shortlink\' href\=\'https://(?:xvids\.gq|gay-tubes\.cc)/\?p\=(\d+)\'",
                webpage)
            videoid = mobj[0] if mobj else mobj2[0] if mobj2 else "video_id"

            mobj = re.findall(r'contentURL\" content="([^\"]+)\"', webpage)
            real_url = mobj[0] if mobj else ""

            if not real_url:
                raise ExtractError("Can't find real URL")

            _info_video = self._get_info_video(real_url, client)

            format_video = {
                'format_id': 'http-mp4',
                'url': _info_video.get('url'),
                'filesize': _info_video.get('filesize'),
                'ext': 'mp4'
            }

            entry_video = {
                'id': videoid,
                'title': sanitize_filename(title, restricted=True),
                'formats': [format_video],
                'ext': 'mp4'
            }

        except Exception as e:
            self.to_screen(e)
            raise
        finally:
            client.close()

        return entry_video
Exemplo n.º 19
0
    def _real_extract(self, url):

        self.report_extraction(url)

        try:

            _timeout = httpx.Timeout(15, connect=15)
            _limits = httpx.Limits(max_keepalive_connections=None,
                                   max_connections=None)
            client = httpx.Client(
                timeout=_timeout,
                limits=_limits,
                headers=self.get_param('http_headers'),
                follow_redirects=True,
                verify=(not self._downloader.params.get('nocheckcertificate')))

            res = client.get(url)
            if res.status_code >= 400: raise ExtractorError("Page not found")
            webpage = re.sub('[\t\n]', '', html.unescape(res.text))
            mobj = re.findall(r'gallery-data" type="text/json">([^\<]+)<',
                              webpage)
            if mobj:
                _player_info = json.loads(mobj[0])
                _url = _player_info.get('videos', {}).get('mp4', {})
                _info_video = self._get_info_video(_url, client)

                format_video = {
                    'format_id': 'http-mp4',
                    'url': _info_video.get('url'),
                    'filesize': _info_video.get('filesize'),
                    'ext': 'mp4'
                }

                return ({
                    'id':
                    str(_player_info.get('id')),
                    'title':
                    sanitize_filename(_player_info.get('title'),
                                      restricted=True),
                    'formats': [format_video],
                    'ext':
                    'mp4'
                })

            else:
                raise ExtractorError("No video details found")

        except ExtractorError as e:
            raise
        except Exception as e:
            lines = traceback.format_exception(*sys.exc_info())
            self.to_screen(f"{repr(e)}\n{'!!'.join(lines)}")
            raise ExtractorError(repr(e))
        finally:
            client.close()
Exemplo n.º 20
0
async def creat_tasks_status_ok(urls, limit, timeout):
    limits = httpx.Limits(max_keepalive_connections=limit,
                          max_connections=limit)
    async with httpx.AsyncClient(limits=limits, timeout=None) as client:
        tasks = [
            asyncio.create_task(_is_response_staus_ok(client, url, timeout))
            for url in urls
        ]
        status_ok = await asyncio.gather(*tasks)

    return status_ok
Exemplo n.º 21
0
def get_client() -> httpx.AsyncClient:
    """Returns a httpx client that can be used with get_updates()."""
    transport = httpx.AsyncHTTPTransport(
        limits=httpx.Limits(max_keepalive_connections=5),
        retries=5,
    )
    headers = {
        'Content-Type': 'application/x-www-form-urlencoded',
        'User-Agent': 'packet-windoze',
    }
    return httpx.AsyncClient(headers=headers, timeout=30, transport=transport)
Exemplo n.º 22
0
 def start_client(self, msg=None):
     if msg:
         self.logger.info(f"{msg} client init")
     client = httpx.Client(timeout=httpx.Timeout(20, connect=60),
                         limits=httpx.Limits(max_keepalive_connections=None, max_connections=None),
                         headers={'user-agent': self.useragent})
     client.get("https://readcomiconline.li")
     client.cookies.set("rco_quality", "hq", "readcomiconline.li")
     client.cookies.set("rco_readType", "1", "readcomiconline.li")
     time.sleep(1)
     client.get("https://readcomiconline.li")
     time.sleep(1)
     return client
Exemplo n.º 23
0
async def main():
    responses = []
    urls = get_api_urls(num=2000)
    limits = httpx.Limits(max_keepalive_connections=20, max_connections=80)  # 104
    # limits = httpx.Limits(max_keepalive_connections=10, max_connections=20)  # 104
    # limits = httpx.Limits(max_keepalive_connections=5, max_connections=10)  # 102
    s = time.perf_counter()
    async with httpx.AsyncClient(limits=limits, timeout=20.0) as client:
        responses = await fetch(client, urls)
    elapsed = time.perf_counter() - s
    print(f"elapsed time: {elapsed}")
    requests_per_second = len(urls) / elapsed
    print(f"requests/s: {requests_per_second}")
Exemplo n.º 24
0
    def _client(self):
        if self._client_storage is None:
            logger.debug('Creating a new client instance.')
            limits = httpx.Limits(max_connections=1,
                                  max_keepalive_connections=0)
            self._client_storage = httpx.Client(
                auth=self._authenticate_request,
                verify=self._root_cert_path,
                http2=True,
                timeout=10.0,
                limits=limits,
                base_url=self._base_url)

        return self._client_storage
Exemplo n.º 25
0
def launch():
    # Run the above function and store its results in a variable.
    full_file_paths = get_filepaths(".")
    parsed = []
    limits = httpx.Limits(max_keepalive_connections=5, max_connections=5)
    client = httpx.Client(verify=False, limits=limits)
    for file in full_file_paths:
        return_val = submit_to_blackd(file, client)
        if return_val:
            parsed.append(return_val)

    if len(parsed) != len(full_file_paths):
        print(
            f"Not parsed everything, {len(full_file_paths) - len(parsed)} remaining"
        )
def test_custom_httpx_config():
    """
    Test the SchemaRegistryClient creation with custom httpx config
    """
    timeout = httpx.Timeout(10.0, connect=60.0)
    pool_limits = httpx.Limits(max_keepalive=5, max_connections=10)

    client = SchemaRegistryClient(
        url="https://127.0.0.1:65534",
        timeout=timeout,
        pool_limits=pool_limits,
    )

    assert client.timeout == timeout
    assert client.pool_limits == pool_limits
Exemplo n.º 27
0
async def process_urls(headers, username, model_id, urls):
    if urls:
        operations.create_database(model_id)
        media_ids = operations.get_media_ids(model_id)
        separated_urls = separate_by_id(urls, media_ids)

        path = pathlib.Path.cwd() / username
        path.mkdir(exist_ok=True)

        # Added pool limit:
        limits = httpx.Limits(max_connections=8, max_keepalive_connections=5)
        async with httpx.AsyncClient(headers=headers, limits=limits, timeout=None) as c:
            add_cookies(c)

            aws = [asyncio.create_task(
                download(c, path, model_id, *url)) for url in separated_urls]

            photo_count = 0
            video_count = 0
            total_bytes_downloaded = 0
            data = 0

            desc = 'Progress: ({p_count} photos, {v_count} videos || {data})'

            with tqdm(desc=desc.format(p_count=photo_count, v_count=video_count, data=data), total=len(aws), colour='cyan', leave=True) as main_bar:
                for coro in asyncio.as_completed(aws):
                    try:
                        media_type, num_bytes_downloaded = await coro
                    except Exception as e:
                        print(e)

                    total_bytes_downloaded += num_bytes_downloaded
                    data = convert_num_bytes(total_bytes_downloaded)

                    if media_type == 'photo':
                        photo_count += 1
                        main_bar.set_description(
                            desc.format(
                                p_count=photo_count, v_count=video_count, data=data), refresh=False)

                    elif media_type == 'video':
                        video_count += 1
                        main_bar.set_description(
                            desc.format(
                                p_count=photo_count, v_count=video_count, data=data), refresh=False)

                    main_bar.update()
Exemplo n.º 28
0
 async def measure_server(self, epoch):
     print("measure server")
     print(epoch.urls[0])
     max_connections = min(epoch.number_of_connections, 50)
     print("max_connections: ", max_connections)
     # max_connections = 10
     limits = httpx.Limits(max_keepalive_connections=10,
                           max_connections=max_connections)
     timeout = httpx.Timeout(30.0, connect=60.0)
     start = time.perf_counter()
     async with httpx.AsyncClient(limits=limits, timeout=timeout) as client:
         responses = await asyncio.gather(
             *[client.get(url) for url in epoch.urls])
     elapsed = time.perf_counter() - start
     print("done: ", elapsed)
     print("responses status: ", responses[0].status_code)
     return elapsed, responses
Exemplo n.º 29
0
class PortalImageDownloader(object):
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'
    }
    limits = httpx.Limits(max_keepalive_connections=20, max_connections=40)

    def __init__(self, dirname):
        self.img_dir = Path(dirname)
        self.img_dir.mkdir(parents=True, exist_ok=True)

    async def __download_img(self, client, portal):
        img_path = self.img_dir.joinpath(
            f"{portal['lng']}_{portal['lat']}.jpg")
        if img_path.exists():
            return True
        try:
            async with client.stream('GET', portal['url']) as r:
                if r.status_code == httpx.codes.OK:
                    async with aiofiles.open(str(img_path), 'wb') as f:
                        async for chunk in r.aiter_bytes():
                            await f.write(chunk)
                    return True
                else:
                    return False
        except:
            await asyncio.to_thread(partial(img_path.unlink, missing_ok=True))
            return False

    async def download_from_csv(self, portals_csv):
        portals_list = await asyncio.to_thread(PortalsCSV.read_csv,
                                               portals_csv)
        async with httpx.AsyncClient(headers=self.headers,
                                     limits=self.limits) as client:
            print(f'[!] 正在根据({portals_csv})下载Portal图像...')
            tasks = [
                asyncio.create_task(self.__download_img(client, portal))
                for portal in portals_list
            ]
            unfinished = [
                n for n, coro in enumerate(tqdm.as_completed(tasks), 1)
                if await coro is False
            ]
            print(f'[!] 图像下载完成。')
            if any(unfinished):
                print(f"[!] 有{len(unfinished)}个图像下载失败,请重新执行下载剩余图像。")
Exemplo n.º 30
0
async def run(count: int = 1, batch: int = 1) -> None:
    sema = asyncio.Semaphore(batch)

    async def get(s: httpx.AsyncClient) -> bool:
        async with sema:
            resp = await s.get(URL)
            return resp.status_code == 200 and resp.text == 'OK\n'

    fails = 0
    limits = httpx.Limits(max_keepalive_connections=batch,
                          max_connections=batch)
    async with httpx.AsyncClient(limits=limits) as s:
        tasks = [get(s) for _ in range(count)]
        for task in asyncio.as_completed(tasks):
            fails += not await task

    assert fails == 0