示例#1
0
文件: models.py 项目: jocke-l/respx
def decode_request(request: Request) -> httpx.Request:
    """
    Build a httpx Request from httpcore request args.
    """
    method, url, headers, stream = request
    return httpx.Request(method, url, headers=headers, stream=stream)
示例#2
0
def null_request(method='GET', url="http://www.example.org/", **kwargs):
    return httpx.Request(method, url, **kwargs)
示例#3
0
 def build_request(self, method, url):
     headers = http.DEFAULT_CLIENT_OPTIONS["headers"].copy()
     headers["Authorization"] = f"token {self._token}"
     return httpx.Request(method, url, headers=headers)
示例#4
0
def test_data_pattern(lookup, data, expected):
    request = httpx.Request("POST", "https://foo.bar/", data=data)
    match = Data(data, lookup=lookup).match(request)
    assert bool(match) is expected
示例#5
0
def test_method_pattern(lookup, value, expected):
    request = httpx.Request("GET", "https://foo.bar/")
    assert bool(Method(value, lookup=lookup).match(request)) is expected
示例#6
0
def test_port_pattern(lookup, port, url, expected):
    request = httpx.Request("GET", url)
    assert bool(Port(port, lookup=lookup).match(request)) is expected
示例#7
0
def test_url_pattern(lookup, value, context, url, expected):
    request = httpx.Request("GET", url)
    match = URL(value, lookup=lookup).match(request)
    assert bool(match) is expected
    assert match.context == context
示例#8
0
def test_content_length_header():
    request = httpx.Request("POST", "http://example.org", data=b"test 123")
    assert request.headers["Content-Length"] == "8"
示例#9
0
def test_json_encoded_data():
    request = httpx.Request("POST", "http://example.org", json={"test": 123})
    request.read()

    assert request.headers["Content-Type"] == "application/json"
    assert request.content == b'{"test": 123}'
示例#10
0
async def test_deprecated_apis():
    with respx.mock:
        url = "https://foo.bar/"

        # Response kwargs among request kwargs
        with warnings.catch_warnings(record=True) as w:
            respx.get(url, status_code=201)
            respx.get(url, headers={})
            respx.get(url, content_type="foo/bar")
            respx.get(url, content="")
            respx.get(url, text="")
            respx.get(url, html="")
            respx.get(url, json={})
            respx.get(url, pass_through=True)
            assert len(w) == 8

        # Add route by http method string
        with warnings.catch_warnings(record=True) as w:
            respx.add("GET", url)
            assert len(w) == 1

        # Alias and aliases
        with warnings.catch_warnings(record=True) as w:
            request_pattern = respx.get(url, alias="index")
            name = request_pattern.alias
            aliased_pattern = respx.mock.aliases["index"]
            assert aliased_pattern is request_pattern
            assert name == request_pattern.name
            assert len(w) == 3

        # RequestPattern
        with warnings.catch_warnings(record=True) as w:
            callback = lambda req, res: res  # pragma: nocover
            request_pattern = RequestPattern(callback)
            assert request_pattern.has_side_effect

            request_pattern = RequestPattern("GET",
                                             "https://foo.bar/",
                                             pass_through=True)
            assert request_pattern.is_pass_through
            assert len(w) == 2

        # ResponseTemplate
        with warnings.catch_warnings(record=True) as w:
            request = httpx.Request("GET", "https://foo.bar/")

            callback = lambda request: ResponseTemplate(201)
            request_pattern = RequestPattern(callback,
                                             response=ResponseTemplate(444))
            assert request_pattern.resolve(request).status_code == 201

            request_pattern = RequestPattern("GET",
                                             response=ResponseTemplate(444))
            assert request_pattern.resolve(request).status_code == 444

            assert len(w) == 5

        # Mixing callback and response details
        with pytest.raises(NotImplementedError):
            callback = lambda request: ResponseTemplate(201)  # pragma: nocover
            respx.Router().add(callback, status_code=201)

        # Async callback
        with pytest.raises(NotImplementedError):

            async def callback(request):
                return None  # pragma: nocover

            mock_response = MockResponse(content=callback)
            request = httpx.Request("GET", "http://foo.bar/")
            mock_response.as_response(request)
示例#11
0
def test_no_content():
    request = httpx.Request("GET", "http://example.org")
    assert "Content-Length" not in request.headers
示例#12
0
def test_request_user_agent_pipeline():
    pl = pls.RequestUserAgentPipeline(user_agent="ant")
    req = httpx.Request("GET", "https://www.hi.com")
    assert pl.process(req) is req
    assert req.headers["User-Agent"] == "ant"
示例#13
0
def test_request_duplicate_filter_pipeline():
    pl = pls.RequestDuplicateFilterPipeline()
    req = httpx.Request("GET", "http://test.com")
    assert pl.process(req) is req
    with pytest.raises(Dropped):
        pl.process(req)
示例#14
0
async def test_pipeline():
    pl = pls.Pipeline()
    pl.process(httpx.Request("GET", "https://test.com"))
示例#15
0
def test_scheme_pattern(lookup, scheme, expected):
    request = httpx.Request("GET", "https://foo.bar/")
    assert bool(Scheme(scheme, lookup=lookup).match(request)) is expected
示例#16
0
def test_request_repr():
    request = httpx.Request("GET", "http://example.org")
    assert repr(request) == "<Request('GET', 'http://example.org')>"
示例#17
0
def test_host_pattern(lookup, host, expected):
    request = httpx.Request("GET", "https://foo.bar/")
    assert bool(Host(host, lookup=lookup).match(request)) is expected
示例#18
0
async def test_cannot_access_content_without_read():
    # Ensure a request may still be streamed if it has been read.
    #  Needed for cases such as authentication classes that read the request body.
    request = httpx.Request("POST", "http://example.org", json={"test": 123})
    with pytest.raises(httpx.RequestNotRead):
        request.content
示例#19
0
def test_params_pattern(lookup, params, url, expected):
    request = httpx.Request("GET", url)
    assert bool(Params(params, lookup=lookup).match(request)) is expected
示例#20
0
def test_override_host_header():
    headers = {"host": "1.2.3.4:80"}

    request = httpx.Request("GET", "http://example.org", headers=headers)
    assert request.headers["Host"] == "1.2.3.4:80"
示例#21
0
def test_content_pattern(lookup, content, expected):
    request = httpx.Request("POST", "https://foo.bar/", content=b"foobar")
    match = Content(content, lookup=lookup).match(request)
    assert bool(match) is expected
示例#22
0
def test_override_accept_encoding_header():
    headers = {"Accept-Encoding": "identity"}

    request = httpx.Request("GET", "http://example.org", headers=headers)
    assert request.headers["Accept-Encoding"] == "identity"
示例#23
0
def test_json_pattern(lookup, value, json, expected):
    request = httpx.Request("POST", "https://foo.bar/", json=json)
    match = JSON(value, lookup=lookup).match(request)
    assert bool(match) is expected
示例#24
0
 def build_request(self, method: str, url: str) -> httpx.Request:
     headers = http.DEFAULT_HEADERS.copy()
     headers["Authorization"] = f"token {self._token}"
     return httpx.Request(method, url, headers=headers)
示例#25
0
def test_headers_pattern(lookup, headers, request_headers, expected):
    request = httpx.Request("GET",
                            "http://foo.bar/",
                            headers=request_headers,
                            json={"foo": "bar"})
    assert bool(Headers(headers, lookup=lookup).match(request)) is expected
示例#26
0
import brotli
import pytest

import httpx
from httpx.content_streams import AsyncIteratorStream
from httpx.decoders import (
    BrotliDecoder,
    DeflateDecoder,
    GZipDecoder,
    IdentityDecoder,
    LineDecoder,
    TextDecoder,
)

REQUEST = httpx.Request("GET", "https://example.org")


def test_deflate():
    body = b"test 123"
    compressor = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS)
    compressed_body = compressor.compress(body) + compressor.flush()

    headers = [(b"Content-Encoding", b"deflate")]
    response = httpx.Response(200,
                              headers=headers,
                              content=compressed_body,
                              request=REQUEST)
    assert response.content == body

示例#27
0
class TestStaticSpider:
    """Tests class StaticSpider"""
    async def test_specific_static_attributes_are_correctly_instantiated(self):
        config = Configuration(user_agent='mozilla/5.0')
        spider = StaticSpider(urls=['http://foo.com'],
                              parse=lambda x, y: None,
                              config=config)

        assert config == spider._config
        assert isinstance(spider._lock, anyio.Lock)
        assert isinstance(spider._queue, Queue)
        assert isinstance(spider._start_time, float)
        assert isinstance(spider._http_client, httpx.AsyncClient)
        assert isinstance(spider._robots_analyser, RobotsAnalyzer)

    # _fetch tests

    @respx.mock
    async def test_fetch_method_returns_httpx_response(self, anyio_spider):
        url = 'http://foo.com'
        respx.get('http://foo.com') % {'text': 'content'}
        response = await anyio_spider._fetch(url)

        assert 'content' == response.text
        assert 200 == response.status_code
        assert url == f'{response.url}'

    @respx.mock
    async def test_middlewares_are_applied_when_fetching_resources(
            self, capsys):
        def log_middleware(fetch):
            async def wrapper(*args, **kwargs):
                print('before fetching')
                return await fetch(*args, **kwargs)

            print('after fetching')
            return wrapper

        url = 'http://foo.com'
        respx.get(url)
        config = Configuration(response_middlewares=[log_middleware])
        spider = StaticSpider(urls=[url],
                              parse=lambda x, y: None,
                              config=config)
        response = await spider._fetch(url)

        assert 200 == response.status_code
        out, _ = capsys.readouterr()
        assert 'before fetching' in out
        assert 'after fetching' in out

    # _get_static_response test

    @pytest.mark.parametrize(
        ('url', 'text', 'httpx_response'),
        [
            ('file:///home/kevin/page.html', 'hello world', None),
            ('', '',
             httpx.Response(200,
                            request=httpx.Request('GET', 'http://foo.com'))),
        ],
    )
    async def test_should_return_static_response_when_giving_correct_input(
            self, url, text, httpx_response, anyio_spider):
        static_response = anyio_spider._get_static_response(
            url, text, httpx_response)

        assert isinstance(static_response, StaticResponse)
        assert static_response._reachable_urls is anyio_spider.reachable_urls
        assert static_response._followed_urls is anyio_spider.followed_urls
        assert static_response._queue is anyio_spider._queue
        assert url == static_response._url
        assert text == static_response._text
        assert static_response._httpx_response is httpx_response

    # _handle_url tests

    @pytest.mark.parametrize(
        ('reachable_urls', 'unreachable_urls', 'robots_excluded_urls'),
        [(set(), set(), {'http://foo.com'}),
         (set(), {'http://foo.com'}, set()),
         ({'http://foo.com'}, set(), set())],
    )
    async def test_should_do_nothing_if_url_is_already_present_in_one_url_set(
            self, mocker, anyio_spider, reachable_urls, unreachable_urls,
            robots_excluded_urls):
        url = 'http://foo.com'
        logger_mock = mocker.patch('logging.Logger.debug')
        anyio_spider.reachable_urls = reachable_urls
        anyio_spider.unreachable_urls = unreachable_urls
        anyio_spider.robots_excluded_urls = robots_excluded_urls
        await anyio_spider._handle_url(url)

        logger_mock.assert_any_call('url %s has already been processed', url)

    async def test_should_read_file_content_when_giving_a_file_url(
            self, tmp_path):
        parse_args = []
        hello_file = tmp_path / 'hello.txt'
        hello_file.write_text('hello world')
        file_url = hello_file.resolve().as_uri()

        async def parse(spider, response):
            parse_args.extend([spider, response])

        static_spider = StaticSpider(urls=[file_url], parse=parse)
        await static_spider._handle_url(file_url)

        assert parse_args[0] is static_spider
        static_response = parse_args[1]
        assert isinstance(static_response, StaticResponse)
        assert file_url == static_response._url
        assert 'hello world' == static_response._text
        assert static_response._httpx_response is None
        assert {file_url} == static_spider.reachable_urls

    async def test_should_not_called_parse_method_when_file_cannot_be_opened(
            self, tmp_path, mocker):
        logger_mock = mocker.patch('logging.Logger.exception')
        hello_file = tmp_path / 'hello.txt'
        file_url = hello_file.resolve().as_uri()
        parse_args = []

        async def parse(spider, response):
            parse_args.extend([spider, response])

        static_spider = StaticSpider(urls=[file_url], parse=parse)
        await static_spider._handle_url(file_url)

        assert [] == parse_args
        logger_mock.assert_any_call('unable to open file %s', file_url)
        assert {file_url} == static_spider.unreachable_urls

    @respx.mock
    @pytest.mark.parametrize('status_code', [404, 500])
    async def test_should_not_called_parse_method_if_httpx_response_is_an_error_one(
            self, mocker, status_code):
        parse_args = []
        url = 'http://foo.com'

        def parse(spider, response):
            parse_args.extend([spider, response])

        respx.get(url) % status_code
        logger_mock = mocker.patch('logging.Logger.info')
        static_spider = StaticSpider(urls=[url], parse=parse)
        await static_spider._handle_url(url)

        assert [] == parse_args
        logger_mock.assert_any_call(
            'fetching url %s returns an error with status code %s', url,
            status_code)

    # for an unknown reason asyncio timer on Windows does not work correctly which makes
    # static_spider._total_fetch_time to be equal to 0.0 and therefore the test fails
    # this si why the test is only run on trio backend
    @respx.mock
    @pytest.mark.parametrize('anyio_backend', ['trio'])
    async def test_should_fetch_content_when_giving_http_url(
            self, anyio_backend):
        parse_args = []
        url = 'http://foo.com'

        async def parse(spider, response):
            parse_args.extend([spider, response])

        respx.get(url) % {'text': 'http content'}
        static_spider = StaticSpider(urls=[url], parse=parse)
        await static_spider._handle_url(url)

        assert parse_args[0] is static_spider
        static_response = parse_args[1]
        assert isinstance(static_response, StaticResponse)
        assert '' == static_response._url
        assert '' == static_response._text
        assert 200 == static_response._httpx_response.status_code
        assert 'http content' == static_response._httpx_response.text
        assert 1 == static_spider.request_counter
        assert static_spider._total_fetch_time > 0

    @respx.mock
    async def test_should_raise_errors_if_parse_function_raises_error_and_ignore_errors_is_false(
            self):
        async def parse(*_):
            raise ValueError('just a test')

        url = 'http://foo.com'
        respx.get(url)
        static_spider = StaticSpider(urls=[url],
                                     parse=parse,
                                     ignore_errors=False)

        with pytest.raises(ValueError) as exc_info:
            await static_spider._handle_url(url)

        assert 'just a test' == str(exc_info.value)

    @respx.mock
    async def test_should_not_raise_error_if_parse_function_raises_error_and_ignore_errors_is_true(
            self):
        async def parse(*_):
            raise ValueError('just a test')

        url = 'http://foo.com'
        respx.get(url)
        static_spider = StaticSpider(urls=[url],
                                     parse=parse,
                                     ignore_errors=True)

        try:
            await static_spider._handle_url(url)
        except ValueError:
            pytest.fail('ValueError was raised and it should not happen')

    # save_item tests

    async def test_should_call_item_processors_and_reject_item_if_one_processor_returns_none(
            self, capsys, mocker):
        logger_mock = mocker.patch('logging.Logger.debug')
        data = {'banana': True}

        def processor_1(item):
            print("I'm a processor")
            return item

        async def processor_2(item):
            await anyio.sleep(0)
            if 'banana' in item:
                return
            return item

        config = Configuration(item_processors=[processor_1, processor_2])
        static_spider = StaticSpider(urls=['http://foo.com'],
                                     parse=lambda x, y: None,
                                     config=config)
        await static_spider.save_item(data)

        logger_mock.assert_any_call('item %s was rejected', data)
        out, _ = capsys.readouterr()
        assert "I'm a processor" in out

    async def test_should_save_content_to_backup_file(self, tmp_path, capsys):
        def processor(item):
            print("I'm a processor")
            return item

        backup = tmp_path / 'backup.mp'
        fruit_1 = {'fruit': 'pineapple'}
        fruit_2 = {'fruit': 'orange'}
        config = Configuration(backup_filename=f'{backup.resolve()}',
                               item_processors=[processor])
        static_spider = StaticSpider(urls=['https://foo.com'],
                                     parse=lambda x, y: None,
                                     config=config)
        await static_spider.save_item(fruit_1)
        await static_spider.save_item(fruit_2)
        out, _ = capsys.readouterr()

        assert [fruit_1, fruit_2
                ] == [item async for item in read_mp(f'{backup.resolve()}')]
        assert "I'm a processor" in out

    # _get_request_delay tests

    @respx.mock
    @pytest.mark.parametrize(('robots_content', 'value'),
                             [('Disallow: /', -1), ('Crawl-delay: 2', 2)])
    async def test_should_return_robots_txt_value_when_follow_robots_txt_is_true(
            self, robots_content, value):
        url = 'http://foo.com'
        respx.get(f'{url}/robots.txt') % {
            'text': f'User-agent:*\n{robots_content}'
        }
        static_spider = StaticSpider(
            urls=[url],
            parse=lambda x, y: None,
            config=Configuration(follow_robots_txt=True))

        assert value == await static_spider._get_request_delay(url)

    @respx.mock
    async def test_should_return_config_delay_when_follow_robots_txt_is_false(
            self):
        url = 'http://foo.com'
        request = respx.get(f'{url}/robots.txt') % {
            'text': 'User-agent:*\nDisallow: '
        }
        config = Configuration(min_request_delay=3, max_request_delay=3)
        static_spider = StaticSpider(urls=[url],
                                     parse=lambda x, y: None,
                                     config=config)

        assert not request.called
        assert 3 == await static_spider._get_request_delay(url)

    # simple test of run and statistics methods, more reliable tests are below

    @respx.mock
    # it is very weird but when I try to combine this test with the next one (to have only one test) I get
    # a strange error related to mocking. This is why I have two tests, one to check exclusion works and another
    # to check the rest of the logic
    async def test_should_exclude_url_when_robots_txt_excludes_it(self):
        url = 'http://foo.com'
        respx.get(f'{url}/robots.txt') % 401

        async def parse(*_) -> None:
            pass

        static_spider = StaticSpider(
            urls=[url],
            parse=parse,
            config=Configuration(follow_robots_txt=True))
        await static_spider.run()
        assert static_spider.reachable_urls == set()
        assert static_spider.robots_excluded_urls == {url}

    # trio is the only backend checked for the same reason explained above for the test
    # test_should_fetch_content_when_giving_http_url
    @respx.mock
    @pytest.mark.parametrize('anyio_backend', ['trio'])
    async def test_should_return_correct_statistics_after_running_spider(
            self, anyio_backend):
        url1 = 'http://foo.com'
        respx.get(url1, path='/')
        respx.get(f'{url1}', path='/robots.txt') % 404

        async def parse(*_) -> None:
            pass

        static_spider = StaticSpider(
            urls=[url1],
            parse=parse,
            config=Configuration(follow_robots_txt=True))
        await static_spider.run()
        stats = static_spider.statistics()

        assert stats.reachable_urls == {url1}
        assert stats.unreachable_urls == set()
        assert stats.followed_urls == set()
        assert stats.robot_excluded_urls == set()
        assert stats.request_counter == 1
        assert stats.total_time > 0
        assert stats.average_fetch_time > 0
示例#28
0
def test_cookies_pattern(lookup, cookies, request_cookies, expected):
    request = httpx.Request("GET",
                            "http://foo.bar/",
                            cookies=request_cookies,
                            json={"foo": "bar"})
    assert bool(Cookies(cookies, lookup=lookup).match(request)) is expected
示例#29
0
 def build_github_app_request(self, method, url, force=False):
     headers = http.DEFAULT_CLIENT_OPTIONS["headers"].copy()
     headers[
         "Authorization"] = f"Bearer {github_app.get_or_create_jwt(force)}"
     return httpx.Request(method, url, headers=headers)
示例#30
0
def request_base():
    request = httpx.Request("GET", "https://example.com")
    with httpx.Client() as client:
        response = client.send(request)