Exemplo n.º 1
0
def test_url_copywith_invalid_component():
    url = httpx.URL("https://example.org")
    with pytest.raises(TypeError):
        url.copy_with(pathh="/incorrect-spelling")
    with pytest.raises(TypeError):
        url.copy_with(userinfo="should be bytes")
Exemplo n.º 2
0
def test_not_same_origin():
    origin1 = httpx.URL("https://example.com")
    origin2 = httpx.URL("HTTP://EXAMPLE.COM")
    assert not same_origin(origin1, origin2)
Exemplo n.º 3
0
        ("https://foo/", "foo=bar", "https://foo/", "foo=bar"),
        ("https://foo/", b"foo=bar", "https://foo/", b"foo=bar"),
        ("https://foo/", [("foo", "bar")], "https://foo/", [("foo", "bar")]),
        ("https://foo/", {
            "foo": "bar"
        }, "https://foo/", {
            "foo": "bar"
        }),
        ("https://foo/", (("foo", "bar"), ), "https://foo/",
         (("foo", "bar"), )),
        ("https://foo?foo=bar", "baz=qux", "https://foo?foo=bar", "baz=qux"),
        ("https://foo?foo=bar", "baz=qux", "https://foo?foo=bar&baz=qux",
         None),
        (re.compile(r"https://foo/(\w+)/"), "foo=bar", "https://foo/bar/",
         "foo=bar"),
        (httpx.URL("https://foo/"), "foo=bar", "https://foo/", "foo=bar"),
        (
            httpx.URL("https://foo?foo=bar"),
            "baz=qux",
            "https://foo?foo=bar&baz=qux",
            None,
        ),
    ],
)
async def test_params_match(client, url, params, call_url, call_params):
    respx.get(url, params=params) % dict(content="spam spam")
    response = await client.get(call_url, params=call_params)
    assert response.text == "spam spam"


@pytest.mark.asyncio
Exemplo n.º 4
0
 def __init__(self, client, dynamo_base=None):
     if dynamo_base is None:
         dynamo_base = Dynamo.defaults["dynamo_base"]
     self.client = client
     self.baseurl = httpx.URL(dynamo_base)
Exemplo n.º 5
0
 def __init__(self, client, dbs_base=None):
     if dbs_base is None:
         dbs_base = DBS.defaults["dbs_base"]
     self.client = client
     self.baseurl = httpx.URL(dbs_base)
Exemplo n.º 6
0
    def _real_extract(self, url):

        self.report_extraction(url)

        driver = self.get_driver()

        try:

            with GayStreamIE._LOCK:

                driver.get(url)

            el_over = self.wait_until(
                driver, 60,
                ec.presence_of_element_located((By.CSS_SELECTOR, "a.boner")))
            if el_over:
                el_over.click()

            el_ifr = self.wait_until(
                driver, 60, ec.presence_of_element_located((By.ID, "ifr")))
            _entry_video = {}

            if el_ifr:
                url_ifr = el_ifr.get_attribute("src")
                _url_ifr = httpx.URL(url_ifr)
                url_post = url_ifr.replace('/v/', '/api/source/')
                data_post = {'r': "https://gaystream.pw/", 'd': _url_ifr.host}
                headers_post = {
                    'Referer': url_ifr,
                    'Origin': f'{_url_ifr.scheme}://{_url_ifr.host}'
                }
                self.wait_until(driver, randint(3, 5))
                info = self.get_info_video(url, url_post, data_post,
                                           headers_post, driver)
                self.to_screen(f'{url}:{url_post}\n{info}')
                _formats = []
                if info:
                    for vid in info.get('data'):
                        _url = vid.get('file')
                        _info_video = self.get_info_for_format(_url)
                        if not _info_video:
                            raise ExtractorError(f"[{_url}] no video info")
                        _formats.append({
                            'format_id':
                            vid.get('label'),
                            'url':
                            _info_video.get('url'),
                            'resolution':
                            vid.get('label'),
                            'height':
                            int_or_none(vid.get('label')[:-1]),
                            'filesize':
                            _info_video.get('filesize'),
                            'ext':
                            'mp4'
                        })

                    if _formats: self._sort_formats(_formats)
                    _videoid = self._match_id(url)
                    _title = driver.title.replace("Watch", "").replace(
                        "on Gaystream.pw", "").strip()

                    _entry_video = {
                        'id': _videoid,
                        'title': sanitize_filename(_title, restricted=True),
                        'formats': _formats,
                        'ext': 'mp4'
                    }

                    self.to_screen(f'{url}\n{_entry_video}')

                    if not _entry_video: raise ExtractorError("no video info")
                    else:
                        return _entry_video

        except ExtractorError as e:
            raise
        except Exception as e:
            lines = traceback.format_exception(*sys.exc_info())
            self.to_screen(f"{repr(e)} {str(e)} \n{'!!'.join(lines)}")
            raise ExtractorError(str(e))
        finally:
            try:
                self.rm_driver(driver)
            except Exception:
                pass
Exemplo n.º 7
0
def test_should_not_be_proxied(url, no_proxy, expected):
    os.environ.update(no_proxy)
    parsed_url = httpx.URL(url)
    assert should_not_be_proxied(parsed_url) == expected
Exemplo n.º 8
0
def test_ipv6_url():
    url = httpx.URL("http://[::ffff:192.168.0.1]:5678/")

    assert url.host == "::ffff:192.168.0.1"
    assert url.netloc == "[::ffff:192.168.0.1]:5678"
Exemplo n.º 9
0
def test_ipv6_url_copy_with_host(url_str, new_host):
    url = httpx.URL(url_str).copy_with(host=new_host)

    assert url.host == "::ffff:192.168.0.1"
    assert url.netloc == "[::ffff:192.168.0.1]:1234"
    assert str(url) == "http://[::ffff:192.168.0.1]:1234"
Exemplo n.º 10
0
def test_url_invalid_type():
    class ExternalURLClass:  # representing external URL class
        pass

    with pytest.raises(TypeError):
        httpx.URL(ExternalURLClass())  # type: ignore
Exemplo n.º 11
0
def test_url_with_url_encoded_path():
    url = httpx.URL("https://www.example.com/path%20to%20somewhere")
    assert url.path == "/path to somewhere"
    assert url.query == b""
    assert url.raw_path == b"/path%20to%20somewhere"
Exemplo n.º 12
0
def test_url_invalid():
    with pytest.raises(httpx.InvalidURL):
        httpx.URL("https://😇/")
Exemplo n.º 13
0
def test_url_copywith_query():
    url = httpx.URL("https://example.org")
    url = url.copy_with(query=b"a=123")
    assert url.path == "/"
    assert url.query == b"a=123"
    assert url.raw_path == b"/?a=123"
Exemplo n.º 14
0
def test_url_copywith_urlencoded_path():
    url = httpx.URL("https://example.org")
    url = url.copy_with(path="/path to somewhere")
    assert url.path == "/path to somewhere"
    assert url.query == b""
    assert url.raw_path == b"/path%20to%20somewhere"
Exemplo n.º 15
0
def test_merge_relative_url_with_dotted_path():
    client = httpx.Client(base_url="https://www.example.com/some/path")
    request = client.build_request("GET", "../testing/123")
    assert request.url == httpx.URL("https://www.example.com/some/testing/123")
Exemplo n.º 16
0
def test_idna_url(given, idna, host, scheme, port):
    url = httpx.URL(given)
    assert url == httpx.URL(idna)
    assert url.host == host
    assert url.scheme == scheme
    assert url.port == port
Exemplo n.º 17
0
 def url(self) -> httpx.URL:
     """The relative URL for sending the request as an HTTP GET."""
     return httpx.URL(self.endpoint, params=self.params)
Exemplo n.º 18
0
def test_url_eq_str():
    url = httpx.URL("https://example.org:123/path/to/somewhere?abc=123#anchor")
    assert url == "https://example.org:123/path/to/somewhere?abc=123#anchor"
    assert str(url) == url
Exemplo n.º 19
0
 def __init__(self, client, reqmgr_base=None):
     if reqmgr_base is None:
         reqmgr_base = ReqMgr.defaults["reqmgr_base"]
     self.client = client
     self.baseurl = httpx.URL(reqmgr_base)
Exemplo n.º 20
0
def join_url(url: str, ref: str = '.') -> str:
    return str(httpx.URL(url).join(ref))
Exemplo n.º 21
0
 def __init__(self, client, mcm_base=None):
     if mcm_base is None:
         mcm_base = McM.defaults["mcm_base"]
     self.client = client
     self.baseurl = httpx.URL(mcm_base)
Exemplo n.º 22
0
 def __init__(self, client, unified_base=None):
     if unified_base is None:
         unified_base = Unified.defaults["unified_base"]
     self.client = client
     self.baseurl = httpx.URL(unified_base)
Exemplo n.º 23
0
def to_production_url(url: str) -> str:
    urlobj = httpx.URL(url)
    return str(
        urlobj.copy_with(scheme="https", host="florimond.dev", port=None))
Exemplo n.º 24
0
async def feed(request: Request) -> Response:
    # If this client has requested this page from us before and we gave them
    # our special ETag for an archive page, then we can immediately conclude
    # that whatever response we gave them before is fine.
    if any(
        ARCHIVE_ETAG in get_list(v) for v in request.headers.getlist("If-None-Match")
    ):
        # XXX: https://tools.ietf.org/html/rfc7232#section-4.1 says "The server
        # generating a 304 response MUST generate any of the following header
        # fields that would have been sent in a 200 (OK) response to the same
        # request: Cache-Control, Content-Location, Date, ETag, Expires, and
        # Vary," but we don't have most of those available without forwarding
        # the request upstream. We're acting sort of like a cache but because
        # we're stateless we don't have a stored response to use to satisfy
        # this requirement. However we can at least tell the client which of
        # the ETags it asked about is the right one.
        return Response(status_code=304, headers={"ETag": ARCHIVE_ETAG})

    # Allow clients to either URL-encode query parameters in the path or append
    # them unquoted.
    url = httpx.URL(request.path_params["url"], request.query_params)

    expected_hash = next((v for k, v in parse_qsl(url.query) if k == b"modified"), None)

    # No matter what, make sure the feed is sorted by modification time. Even
    # the current feed must change whenever any post is modified.
    url = update_query(url, modified=None, orderby="modified")

    # Forward a limited set of headers from the client request.
    # Enumerating a complete block-list is too hard, so block anything
    # that isn't specifically allowed.
    request_headers = httpx.Headers()
    for header in FORWARD_REQUEST_HEADERS:
        value = request.headers.get(header)
        if value is not None:
            request_headers[header] = value

    async with http_client.stream("GET", url, headers=request_headers) as doc:
        response_headers = doc.headers.copy()

        # Remove the standard hop-by-hop headers, the `Connection`
        # header and any hop-by-hop headers it names, and anything else
        # that would interfere with our transformations.
        remove_headers(
            response_headers,
            *CONNECTION_HEADERS.union(
                header.lower()
                for header in get_list(response_headers.get("connection"))
            ),
        )

        assert doc.url is not None
        url = doc.url
        if "content-location" not in response_headers:
            response_headers["content-location"] = str(url)

        if doc.status_code != 200:
            return Response(
                status_code=doc.status_code, headers=dict(response_headers.items())
            )

        if "https://api.w.org/" not in doc.links:
            raise HTTPException(403, "Not a WordPress site")

        try:
            raw_content_type = response_headers["content-type"]
            content_type = raw_content_type.split(";", 1)[0].strip()
        except KeyError:
            raise HTTPException(502, "Origin didn't provide a Content-Type")

        try:
            contents_hash = sha256()
            parser = XMLParser()
            async for chunk in doc.aiter_bytes():
                contents_hash.update(chunk)
                parser.feed(chunk)
            root = parser.close()
        except ElementTree.ParseError:
            raise HTTPException(406, "Unsupported non-XML feed format")

    actual_hash = urlsafe_b64encode(contents_hash.digest())
    if expected_hash is not None and expected_hash != actual_hash:
        raise HTTPException(410, "Page contents changed")

    feed_element = None
    if root.tag == "{" + NAMESPACES["atom"] + "}feed":
        feed_element = root
    elif root.tag == "rss":
        feed_element = root.find("./channel", NAMESPACES)

    if feed_element is None:
        raise HTTPException(406, f"Unsupported feed format {root.tag}")

    if any(feed_element.find(tag, NAMESPACES) for tag in HISTORY_TAGS):
        raise HTTPException(403, "Already an RFC5005 feed, no proxy needed")

    new_elements = []
    query = dict(parse_qsl(url.query))

    # If the client provided an Authorization header or something then we
    # should forward that for all sub-requests too. But we can't allow "not
    # modified" or "partial content" responses: we need the entire thing.
    remove_headers(request_headers, "if-none-match", "if-modified-since")

    if query.get(b"order") == b"ASC":
        # This is a legit archive page. We just need to construct an
        # appropriate URL for the next-oldest archive page and insert it.
        try:
            page = int(query.get(b"paged", 1))
        except ValueError:
            raise HTTPException(400, "Invalid 'paged' parameter")

        new_elements.append(element("fh", "archive"))

        current_url = update_query(url, modified=None, order=None, paged=None)
        new_elements.append(
            element(
                "atom",
                "link",
                href=str(current_url),
                rel="current",
                type=content_type,
            )
        )

        if page > 1:
            prev_url = await hash_page(
                url, request_headers, actual_hash, raw_content_type, page - 1
            )
            new_elements.append(
                element(
                    "atom",
                    "link",
                    href=str(prev_url),
                    rel="prev-archive",
                    type=content_type,
                )
            )

        # Archive pages are indefinitely cachable because we'll point the
        # client to a different URL if the contents should change.
        response_headers["etag"] = ARCHIVE_ETAG
        for header in ("last-modified", "expires"):
            try:
                del response_headers[header]
            except KeyError:
                pass

        # This feed might still not be okay for a shared cache to store though;
        # check for any existing Cache-Control directives.
        cc = {
            directive
            for directive in get_list(response_headers.get("cache-control"))
            if not directive.startswith("s-max-age=")
            if not directive.startswith("max-age=")
        }
        if "private" not in cc:
            cc.add("public")

        # Cache this response for up to a year and don't revalidate it.
        cc.discard("no-store")
        cc.add('max-age="31536000"')
        cc.add("immutable")

        response_headers["cache-control"] = ", ".join(cc)

    elif b"order" in query or b"paged" in query:
        # Refuse to process non-archive feeds that don't have the newest entries.
        raise HTTPException(403)

    else:
        # We need to treat this as the main feed document, which means now we
        # need to know how many pages WordPress is going to break this feed
        # into so we can link to the last of them.
        url = update_query(url, order="ASC")
        last_page = await exponential_search(
            partial(page_exists, url, request_headers, raw_content_type)
        )

        if last_page == 1:
            # This is a complete feed, no pagination needed.
            new_elements.append(element("fh", "complete"))
        else:
            # The current document has all the posts of the final page, and
            # possibly a few extra if the number of posts per page doesn't
            # evenly divide into the total number of posts. Either way we
            # should link to the page before the final one.
            prev_url = await hash_page(
                url, request_headers, actual_hash, raw_content_type, last_page - 1
            )
            new_elements.append(
                element(
                    "atom",
                    "link",
                    rel="prev-archive",
                    type=content_type,
                    href=str(prev_url),
                )
            )

    for e in reversed(new_elements):
        feed_element.insert(0, e)

    return Response(
        ElementTree.tostring(root),
        media_type=content_type,
        headers=dict(response_headers.items()),
    )
Exemplo n.º 25
0
import logging

import httpx
import pytest

from collector import IHSClient
from tests.utils import MockAsyncDispatch

logger = logging.getLogger(__name__)

pytestmark = pytest.mark.asyncio

base_url = httpx.URL("http://127.0.0.1")


@pytest.fixture
def well_dispatcher():
    yield MockAsyncDispatch({
        "data": [
            {
                "a": 1,
                "b": 2,
                "c": 3
            },
            {
                "a": 3,
                "b": 4,
                "c": 5
            },
            {
                "a": 5,
Exemplo n.º 26
0
def test_merge_absolute_url():
    client = httpx.Client(base_url="https://www.example.com/")
    request = client.build_request("GET", "http://www.example.com/")
    assert request.url == httpx.URL("http://www.example.com/")
    with pytest.warns(DeprecationWarning):
        assert not request.url.is_ssl
Exemplo n.º 27
0
def test_url_matches(pattern, url, expected):
    pattern = URLPattern(pattern)
    assert pattern.matches(httpx.URL(url)) == expected
Exemplo n.º 28
0
def test_merge_relative_url():
    client = httpx.Client(base_url="https://www.example.com/")
    request = client.build_request("GET", "/testing/123")
    assert request.url == httpx.URL("https://www.example.com/testing/123")
Exemplo n.º 29
0
async def _async_request_hook(span: "Span", request: "RequestInfo"):
    url = httpx.URL(request[1])
    span.update_name("GET" + str(url))
Exemplo n.º 30
0
 def __init__(self, client, msmgr_base=None):
     if msmgr_base is None:
         msmgr_base = MSMgr.defaults["msmgr_base"]
     self.client = client
     self.baseurl = httpx.URL(msmgr_base)