def test_parse_next_decoder(): # borrow a fixture from htmlstream resource = 'fixtures/htmlstream/shift-jis-next-decoder' readable = resource_stream(__name__, resource) response = Response.from_readable(readable) # here the parse function will try utf-8 and then shift-jis etree = e.parse(response) assert list(etree.getroot().itertext()) == ['\n', '巨', '\n']
def test_phantomjs_using_proxies(): elements = [] with HttpProxy() as proxy: readables = url.get(proxies=proxy.proxies) for response in map(Response.from_readable, readables): tree = parse(response) elements.extend(tree.xpath("//h1")) assert len(elements) == 1 assert proxy.requests == [b"GET http://httpbin.org/html HTTP/1.1"]
def test_parse_ioerror(): class ProblemResponse(object): def __init__(self): self.headers = parse_headers(BytesIO()) self.url = None def read(self, *args): raise IOError response = ProblemResponse() etree = e.parse(response) assert etree.getroot() is e.UNPARSEABLE
def test_phantomjs(): elements = [] context = {"foo": "bar"} for response in map(Response.from_readable, url.get(context=context)): tree = parse(response) elements.extend(tree.xpath("//h1")) assert response.headers.get("X-wex-context-foo") == "bar" assert len(elements) == 1 assert "bcr-left" in elements[0].attrib assert "bcr-top" in elements[0].attrib assert "bcr-right" in elements[0].attrib assert "bcr-bottom" in elements[0].attrib
def test_parse_ioerror(): class ProblemResponse(object): def __init__(self): self.headers = parse_headers(BytesIO()) self.url = None def read(self, *args): raise IOError def seek(self, *args): pass response = ProblemResponse() etree = e.parse(response) assert etree.getroot() is e.UNPARSEABLE
def extract(response): tree = parse(response) yield "name", text(tree.xpath('//h1/text()')) yield "whoops", 1/0 yield "country", text(tree.xpath('//dd[@id="country"]')) yield "region", text(tree.xpath('//dd[@id="region"]'))
def test_parse_unreadable(): obj = object() assert e.parse(obj) is obj
def test_bug_cp1252(): resource = 'fixtures/bug_cp1252' readable = resource_stream(__name__, resource) response = Response.from_readable(readable) etree = e.parse(response) assert etree.xpath('//title/text()') == ['Problem²']
def test_parse(): etree = e.parse(create_response(example)) assert etree.xpath('//h1/text()') == ['hi']
def test_get_base_url(): response = create_response(example) tree = e.parse(response) base_url = e.get_base_url(tree) assert base_url == 'http://base.com/'