Пример #1
0
def test_re_field_get_nothing_with_no_default():
    field = RegexField(re_select='nothing to match.')
    try:
        field.extract(html=HTML)
        raise AssertionError
    except NothingMatchedError:
        pass
Пример #2
0
def test_re_field_in_dict_format_with_many():
    field = RegexField(re_select='<a class="test_link" href="(?P<href>.*?)">(?P<text>.*?)</a>', many=True)
    matches = field.extract(html=HTML)
    assert len(matches) == 5
    assert matches[0]['href'] == 'https://github.com/howie6879/'
    assert matches[0]['text'] == 'hello1 github.'
    assert matches[4]['href'] == 'https://github.com/howie6879/'
    assert matches[4]['text'] == 'hello5 github.'
Пример #3
0
def test_re_field_with_many():
    field = RegexField(re_select='<a class="test_link" href="(.*?)">(.*?)</a>', many=True)
    matches = field.extract(html=HTML)
    assert len(matches) == 5
    href0, text0 = matches[0]
    href4, text4 = matches[4]
    assert href0 == 'https://github.com/howie6879/'
    assert text0 == 'hello1 github.'
    assert href4 == 'https://github.com/howie6879/'
    assert text4 == 'hello5 github.'
Пример #4
0
def test_re_field_with_html_element():
    field = RegexField(
        re_select='<h1><a href="(?P<href>.*?)">(?P<text>.*?)</a></h1>')
    result = field.extract(html=html_etree)
    assert result["href"] == "https://github.com"
    assert result["text"] == "Github"
Пример #5
0
def test_re_field_get_nothing_with_no_default():
    field = RegexField(re_select="nothing to match.")
    try:
        field.extract(html=HTML)
    except Exception as e:
        assert isinstance(e, NothingMatchedError)
Пример #6
0
def test_re_field_with_default():
    field = RegexField(re_select="nothing to match.", default="default value")
    result = field.extract(html=HTML)
    assert result == "default value"
Пример #7
0
def test_re_field_with_many_groups():
    field = RegexField(re_select='<h1><a href="(.*?)">(.*?)</a></h1>')
    href, text = field.extract(html=HTML)
    assert href == "https://github.com"
    assert text == "Github"
Пример #8
0
def test_re_field_with_no_group():
    field = RegexField(re_select="<title>.*?</title>")
    href = field.extract(html=HTML)
    assert href == "<title>ruia</title>"
Пример #9
0
def test_re_field_with_one_group():
    field = RegexField(re_select="<title>(.*?)</title>")
    href = field.extract(html=HTML)
    assert href == "ruia"
Пример #10
0
def test_re_field_with_html_element():
    field = RegexField(
        re_select='<h1><a href="(?P<href>.*?)">(?P<text>.*?)</a></h1>')
    result = field.extract(html=html_etree)
    assert result['href'] == 'https://github.com'
    assert result['text'] == 'Github'