示例#1
0
def test_post():
    method = {"post": {"data": {"x": "1"}}}
    url = URL('http://httpbin.org/post').update_fragment_dict(method=method)
    for readable in url.get():
        response = Response.from_readable(readable)
        data = json.load(utf8_reader(response))
        assert data['form'] == method['post']['data']
示例#2
0
def test_warc_response():
    readable = resource_stream(__name__, 'fixtures/warc_response')
    response = Response.from_readable(readable)
    assert response.url == 'http://httpbin.org/get?this=that'
    assert response.warc_protocol == b'WARC'
    assert response.warc_version == (1, 0)
    assert response.warc_headers.get('warc-type') == 'response'
示例#3
0
def test_post():
    method = {"post": {"data": {"x": "1"}}}
    url = URL('http://httpbin.org/post').update_fragment_dict(method=method)
    for readable in url.get():
        response = Response.from_readable(readable)
        data = json.load(utf8_reader(response))
        assert data['form'] == method['post']['data']
示例#4
0
def test_get_gzip():
    url = 'http://httpbin.org/gzip'
    for i, readable in enumerate(URL(url).get(decode_content=False)):
        response = Response.from_readable(readable)
        data = json.load(utf8_reader(decode(response)))
        assert data.get('gzipped')
    assert i == 0
示例#5
0
def test_warc_response():
    readable = resource_stream(__name__, 'fixtures/warc_response')
    response = Response.from_readable(readable)
    assert response.url == 'http://httpbin.org/get?this=that'
    assert response.warc_protocol == b'WARC'
    assert response.warc_version == (1, 0)
    assert response.warc_headers.get('warc-type') == 'response'
示例#6
0
def test_get_gzip():
    url = 'http://httpbin.org/gzip'
    for i, readable in enumerate(URL(url).get(decode_content=False)):
        response = Response.from_readable(readable)
        data = json.load(utf8_reader(decode(response)))
        assert data.get('gzipped')
    assert i == 0
示例#7
0
def test_chained_does_seek_response():
    readable = resource_stream(__name__, 'fixtures/robots_txt')
    response = Response.from_readable(readable)
    # use the same extractor twice
    extract = chained(extract_first_line, extract_first_line)
    values = list(extract(response))
    # and we get the same first line because chained re-seeks to 0
    assert values == [(b'# /robots.txt\n', ), (b'# /robots.txt\n', )]
示例#8
0
def test_parse_url_response():
    response = Response.from_readable(
        BytesIO(b"HTTP/1.1 200 OK\r\n"
                b"X-wex-request-url: http://www.foo.com/a/b/c\r\n"
                b"Content-type: text/html\r\n"
                b"\r\n"
                b"<html><h1>hello</h1></html>"))
    assert parse_url(response).path == '/a/b/c'
示例#9
0
def test_extract_from_readable():
    readable = BytesIO(b'FTP/1.0 200 OK\r\n\r\nhello')

    def extract(src):
        yield (src.read(), )

    values = list(Response.values_from_readable(extract, readable))
    assert values == [(b'hello', )]
示例#10
0
def test_chained_does_seek_response():
    readable = resource_stream(__name__, 'fixtures/robots_txt')
    response = Response.from_readable(readable)
    # use the same extractor twice
    extract = chained(extract_first_line, extract_first_line)
    values = list(extract(response))
    # and we get the same first line because chained re-seeks to 0
    assert values == [(b'# /robots.txt\n',), (b'# /robots.txt\n',)]
示例#11
0
def stream_from_fixture(fixture):
    resource = 'fixtures/htmlstream/' + fixture
    readable = resource_stream(__name__, resource)
    response = Response.from_readable(readable)
    # do a read just to make sure that we seek(0)
    response.read(100)
    stream = HTMLStream(response)
    return stream
示例#12
0
def test_parse_next_decoder():
    # borrow a fixture from htmlstream
    resource = 'fixtures/htmlstream/shift-jis-next-decoder'
    readable = resource_stream(__name__, resource)
    response = Response.from_readable(readable)
    # here the parse function will try utf-8 and then shift-jis
    etree = e.parse(response)
    assert list(etree.getroot().itertext()) == ['\n', '巨', '\n']
示例#13
0
def test_parse_next_decoder():
    # borrow a fixture from htmlstream
    resource = 'fixtures/htmlstream/shift-jis-next-decoder'
    readable = resource_stream(__name__, resource)
    response = Response.from_readable(readable)
    # here the parse function will try utf-8 and then shift-jis
    etree = e.parse(response)
    assert list(etree.getroot().itertext()) == ['\n', '巨', '\n']
示例#14
0
def extract_with_monkeypatched_logging(monkeypatch, excluded=[]):
    logger = FakeLogger('wex.entrypoints')
    monkeypatch.setattr('logging.getLogger', logger.getLogger)
    extractor = extractor_from_entry_points()
    readable = resource_stream(__name__, 'fixtures/robots_txt')
    for value in Response.values_from_readable(extractor, readable):
        pass
    return logger
示例#15
0
def test_get_gzip():
    url = "http://httpbin.org/gzip"
    for i, readable in enumerate(URL(url).get(decode_content=False)):
        response = Response.from_readable(readable)
        assert response.headers.get("X-wex-has-gzip-magic") == "1"
        data = json.load(utf8_reader(decode(response)))
        assert data.get("gzipped")
    assert i == 0
示例#16
0
def get(url, **kw):
    codes = []
    for readable in URL(url).get(**kw):
        response = Response.from_readable(readable)
        codes.append(response.code)
        if response.code == 200:
            data = json.load(utf8_reader(decode(response)))
            assert 'headers' in data
    return codes
示例#17
0
def get(url, **kw):
    codes = []
    for readable in URL(url).get(**kw):
        response = Response.from_readable(readable)
        codes.append(response.code)
        if response.code == 200:
            data = json.load(utf8_reader(decode(response)))
            assert 'headers' in data
    return codes
示例#18
0
def test_extractor_from_entry_points_hostname_suffix_excluded():
    import testme
    extractor = extractor_from_entry_points()
    readable = resource_stream(__name__, 'fixtures/robots_txt')
    for value in Response.values_from_readable(extractor, readable):
        pass
    hostname = 'www.foo.com'
    assert list(extractor.extractors.keys()) == [hostname]
    extractors = set(extractor.extractors[hostname].extractors)
    assert testme.example_with_hostname_suffix not in extractors
    assert testme.example in extractors
示例#19
0
def test_parse_url_response():
    response = Response.from_readable(
        BytesIO(
            b"HTTP/1.1 200 OK\r\n"
            b"X-wex-request-url: http://www.foo.com/a/b/c\r\n"
            b"Content-type: text/html\r\n"
            b"\r\n"
            b"<html><h1>hello</h1></html>"
        )
    )
    assert parse_url(response).path == '/a/b/c'
示例#20
0
def test_extractor_from_entry_points():
    import testme
    extract = extractor_from_entry_points()
    readable = resource_stream(__name__, 'fixtures/get_this_that')
    for value in Response.values_from_readable(extract, readable):
        pass
    hostname = 'httpbin.org'
    assert list(extract.extractors.keys()) == [hostname]
    extractors = set(extract.extractors[hostname].extractors)
    expected = set([testme.example, testme.example_with_hostname_suffix])
    assert expected.issubset(extractors)
示例#21
0
def test_get_with_params():
    url = 'http://httpbin.org/get'
    # The use case for adding params at '.get' time is for handling
    # authentication tokens to URLs.  The net effect is that the
    # tokens are not saved in the Wex response which is a way of
    # avoiding sharing your access tokens.
    params = {'token': 'secret'}
    for readable in URL(url).get(params=params):
        response = Response.from_readable(readable)
        data = json.load(utf8_reader(response))
        assert data.get('args') == params
        assert response.request_url == url
        assert not 'secret' in response.url
示例#22
0
def test_get_with_params():
    url = 'http://httpbin.org/get'
    # The use case for adding params at '.get' time is for handling
    # authentication tokens to URLs.  The net effect is that the
    # tokens are not saved in the Wex response which is a way of
    # avoiding sharing your access tokens.
    params = {'token': 'secret'}
    for readable in URL(url).get(params=params):
        response = Response.from_readable(readable)
        data = json.load(utf8_reader(response))
        assert data.get('args') == params
        assert response.request_url == url
        assert not 'secret' in response.url
示例#23
0
def build_response(content, response=with_content_length, **kw):

    fmt = {
        'content': content,
        'content_length': len(content),
        'protocol_version': 'HTTP/1.1',
        'code': 200,
        'reason': 'OK',
    }

    for k in fmt:
        if k in kw:
            fmt[k] = kw.pop(k)

    response_bytes = response.format(**fmt).encode('utf-8')
    return Response.from_readable(BytesIO(response_bytes), **kw)
示例#24
0
def build_response(content, response=with_content_length, **kw):

    fmt = {
        'content': content,
        'content_length': len(content),
        'protocol_version': 'HTTP/1.1',
        'code': 200,
        'reason': 'OK',
    }

    for k in fmt:
        if k in kw:
            fmt[k] = kw.pop(k)

    response_bytes = response.format(**fmt).encode('utf-8')
    return Response.from_readable(BytesIO(response_bytes), **kw)
示例#25
0
def create_response(data):
    return Response.from_readable(BytesIO(data))
示例#26
0
def test_parse_obj():
    class Response(object):
        url = url1

    assert parse_url(Response()) == url1_parsed
示例#27
0
def test_get_with_context():
    url = 'http://httpbin.org/headers'
    for readable in URL(url).get(context={'foo': 'bar'}):
        response = Response.from_readable(readable)
        assert response.headers.get('X-wex-context-foo') == 'bar'
示例#28
0
def stream_from_fixture(fixture):
    resource = "fixtures/htmlstream/" + fixture
    readable = resource_stream(__name__, resource)
    response = Response.from_readable(readable)
    stream = HTMLStream(response)
    return stream
示例#29
0
def test_undecodable_url():
    readable = resource_stream(__name__, 'fixtures/undecodable_url.wexin_')
    response = Response.from_readable(readable)
    assert response.url == 'https://www.example.net/Ã'
    assert response.request_url == 'https://www.example.net/Ã#{"method":"get"}'
示例#30
0
def test_extract_from_readable():
    readable = BytesIO(b'FTP/1.0 200 OK\r\n\r\nhello')
    def extract(src):
        yield (src.read(),)
    values = list(Response.values_from_readable(extract, readable))
    assert values == [(b'hello',)]
示例#31
0
def test_undecodable_url():
    readable = resource_stream(__name__, 'fixtures/undecodable_url.wexin_')
    response = Response.from_readable(readable)
    assert response.url == 'https://www.example.net/Ã'
    assert response.request_url == 'https://www.example.net/Ã#{"method":"get"}'
示例#32
0
def test_warc_response():
    readable = resource_stream(__name__, 'fixtures/warc_response')
    response = Response.from_readable(readable)
    assert response.url == 'http://httpbin.org/get?this=that'
示例#33
0
def test_bug_cp1252():
    resource = 'fixtures/bug_cp1252'
    readable = resource_stream(__name__, resource)
    response = Response.from_readable(readable)
    etree = e.parse(response)
    assert etree.xpath('//title/text()') == ['Problem²']
示例#34
0
def test_get_with_context():
    url = 'http://httpbin.org/headers'
    for readable in URL(url).get(context={'foo': 'bar'}):
        response = Response.from_readable(readable)
        assert response.headers.get('X-wex-context-foo') == 'bar'
示例#35
0
def test_bug_cp1252():
    resource = 'fixtures/bug_cp1252'
    readable = resource_stream(__name__, resource)
    response = Response.from_readable(readable)
    etree = e.parse(response)
    assert etree.xpath('//title/text()') == ['Problem²']
示例#36
0
def create_response(data):
    return Response.from_readable(BytesIO(data))
示例#37
0
def response(resource):
    return Response.from_readable(resource_stream(__name__, resource))
示例#38
0
def test_get_with_context():
    url = "http://httpbin.org/headers"
    for readable in URL(url).get(context={"foo": "bar"}):
        response = Response.from_readable(readable)
        assert response.headers.get("X-wex-context-foo") == "bar"