def test_href_url_same_suffix(): f = e.css('#links a') | e.href_url_same_suffix res = f(create_response(example)) # we want the result to be an iterable, but not a list assert hasattr(res, '__iter__') assert not isinstance(res, list) assert list(res) == ['http://base.com/1', 'http://subdomain.base.com/2']
def test_text_content_with_br(): f = e.css('#br') | e.text_content assert f(create_response(example)) == ['oh\nmy']
def test_text(): f = e.css('h1') | e.text | list assert f(create_response(example)) == ['hi']
def test_href_url_single(): f = e.css('#div1 a') | item0 | e.href_url assert f(create_response(example)) == 'http://base.com/1'
def test_img_src(): f = e.css('img') | e.src_url res = f(create_response(example)) assert hasattr(res, '__iter__') assert not isinstance(res, list) assert list(res) == ['http://other.com/src']
def test_attrib(): f = e.css('#div1 a') | e.attrib('href') | list r = create_response(example) assert f(r) == ['/1', ' /2 ', None]
def test_css(): f = e.css('h1') response = create_response(example) res = f(response) assert isinstance(res, list) assert [elem.tag for elem in res] == ['h1']
def test_normalize_space_nbsp(): f = e.css('#nbsp') | e.normalize_space assert f(create_response(example)) == ['']
def test_attrib_default(): f = e.css('#div1 a') | e.attrib('nosuch', '') | list assert f(create_response(example)) == ['', '', '']
def test_css_called_twice(): f = e.css('h1') response = create_response(example) with Cache(): assert f(response)== f(response)
def test_list_normalize_space(): func = e.css('ul li') | e.normalize_space assert func(create_response(example)) == ['1', '', '2']
def test_itertext(): f = e.css('.thing') | e.itertext() | flatten | list expected = ['First ', 'one thing', 'then ', 'another thing', '.'] assert f(create_response(example)) == expected
def test_href_empty(): f = e.css('#nosuch') | e.href_url | list assert f(create_response(example)) == []
def test_css_called_twice(): f = e.css('h1') response = create_response(example) with Cache(): assert f(response) == f(response)
def test_nbsp(): func = e.css('#nbsp') | e.itertext() | list assert func(create_response(example)) == [u'\xa0\xa0']
def test_list_text_content(): func = e.css('ul li') | e.text_content assert func(create_response(example)) == [' 1', '', '2 ']
def test_href_when_url_contains_dodgy_characters(): f = e.css('a') | e.href_url | list r = create_response(example_with_dodgy_url) # This will fail if we don't quote/unquote the base_url assert f(r) == ['http://foo.com/1']
def test_itertext_elem(): f = e.css('.thing') | first | e.itertext() | list expected = ['First ', 'one thing'] assert f(create_response(example)) == expected
def test_href_when_url_contains_non_ascii_characters(): f = e.css('a') | e.href_url | list r = create_response(example_with_non_ascii_url) assert f(r) == ['http://foo.com/bar™/1']
def test_drop_tree(): f = (e.xpath('//*[@id="drop-tree"]') | e.drop_tree(e.css('script')) | e.xpath('string()')) assert f(create_response(example)) == ['Drop this please.']