Python TextResponse._set_body示例

编程语言: Python

命名空间/包名称: scrapy.http.response.text

类/类型: TextResponse

方法/功能: _set_body

hotexamples.com的示例: 3

Python TextResponse._set_body - 已找到3个示例。这些是从开源项目中提取的最受好评的scrapy.http.response.text.TextResponse._set_body现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

TextResponse(17)

_set_body(3)

css(1)

示例#1

显示文件

文件： test_parser.py 项目： GregoryVigoTorres/spelt

def test_skip_elems_by_selector():
    """
    should test for nested elements that should have been removed
    i.e. if a class='link' should be skipped when inside a
    parent element that is not skipped
    """
    spider = serialize.SerializeSpider()
    fake_response = TextResponse(encoding='utf-8',
                                 url='https://doc.scrapy.org')

    spider.exclude_selectors = ['.skip-elem']

    with open('test/data/skip_by_selector.html', mode='rb') as fd:
        body = fd.read()
        fake_response._set_body(body)

    item = spider.parse(fake_response)
    text = next(item).get('text').strip()

    with open('test/data/skip_by_selector.txt') as fd:
        test_text = fd.read().strip()

    # ignore trailing whitespace
    text = re.sub('\n\s', '\n', text).strip()
    test_text = re.sub('\n\s', '\n', test_text).strip()

    assert text == test_text

示例#2

显示文件

文件： test_parser.py 项目： GregoryVigoTorres/spelt

def test_block_elem_with_children():
    """
    tests 2 block elems with children
    ignores trailing whitespace
    """
    spider = serialize.SerializeSpider()

    fake_response = TextResponse(encoding='utf-8',
                                 url='https://doc.scrapy.org')

    with open('test/data/block_elem.html', mode='rb') as fd:
        body = fd.read()
        fake_response._set_body(body)

    item = spider.parse(fake_response)
    text = next(item).get('text')

    with open('test/data/block_elem.txt') as fd:
        test_text = fd.read()

    # ignore trailing whitespace
    text = re.sub('\n\s', '\n', text).strip()
    test_text = re.sub('\n\s', '\n', test_text).strip()

    assert text == test_text

示例#3

显示文件

文件： test_parser.py 项目： GregoryVigoTorres/spelt

def test_skip_elems_by_tag():
    """
    make sure elems are skipped by tag name e.g. script
    """
    spider = serialize.SerializeSpider()
    fake_response = TextResponse(encoding='utf-8',
                                 url='https://doc.scrapy.org')

    spider.exclude_tags.append('footer')

    with open('test/data/skip_by_tag.html', mode='rb') as fd:
        body = fd.read()
        fake_response._set_body(body)

    item = spider.parse(fake_response)
    text = next(item).get('text').strip()

    with open('test/data/skip_by_tag.txt') as fd:
        test_text = fd.read().strip()

    # ignore trailing whitespace
    text = re.sub('\n\s', '\n', text).strip()
    test_text = re.sub('\n\s', '\n', test_text).strip()

    assert text == test_text