示例#1
0
def test_field_extract_without_default(element0, Extractor, expr):
    extractor = Field(Extractor(expr))
    with pytest.raises(ExtractError) as catch:
        extractor.extract(element0)

    exc = catch.value
    assert len(exc.extractors) == 1
    assert exc.extractors[0] is extractor
    assert exc.element is element0
示例#2
0
def test_field_extract(element0, Extractor, expr, expect, build_first):
    field = Field(Extractor(expr))
    assert not field.built
    assert not field.extractor.built
    if build_first:
        field.build()
        assert field.built
        assert field.extractor.built

    assert expect == field.extract(element0)
    assert field.built
    assert field.extractor.built
示例#3
0
def test_field_xpath_extract_result_not_list(element0, build_first):
    field = Field(XPathExtractor("normalize-space(//div[@class='title'])"))
    assert not field.built
    assert not field.extractor.built
    if build_first:
        field.build()
        assert field.built
        assert field.extractor.built

    assert field.extract(element0) == "Title 1"
    assert field.built
    assert field.extractor.built
示例#4
0
def test_field_extract_with_is_many(element0, Extractor, expr, expect,
                                    build_first):
    field = Field(Extractor(expr), is_many=True)
    assert not field.built
    assert not field.extractor.built
    if build_first:
        field.build()
        assert field.built
        assert field.extractor.built

    assert expect == field.extract(element0)
    assert field.built
    assert field.extractor.built
示例#5
0
    class Channel(Item):
        title = Field(XPathExtractor("./title/text()"))
        link = Field(XPathExtractor("./link/text()"))
        description = Field(XPathExtractor("./description/text()"))
        language = Field(XPathExtractor("./language/text()"))
        publish_date = Field(XPathExtractor("./pubDate/text()"))
        last_build_date = Field(XPathExtractor("./lastBuildDate/text()"))
        docs = Field(XPathExtractor("./docs/text()"))
        generator = Field(XPathExtractor("./generator/text()"))
        managing_editor = Field(XPathExtractor("./managingEditor/text()"))
        web_master = Field(XPathExtractor("./webMaster/text()"))

        items = ChannelItem(XPathExtractor("./item"), is_many=True)
示例#6
0
def test_field_overwrites_item_parameter_type_creation(stack_frame_support,
                                                       item_property):
    with pytest.raises(SyntaxError) as catch:
        # fmt: off
        type("Parameter", (Item, ),
             {item_property: Field(XPathExtractor("./span[@class='name']"))
              })  # noqa: E950
        # fmt: on

    exc = catch.value
    if stack_frame_support:
        assert exc.filename == __file__
        assert exc.lineno == inspect.currentframe().f_lineno - 6
        assert exc.offset == 8
        assert (exc.text == """
        type("Parameter", (Item,), {item_property: Field(XPathExtractor("./span[@class='name']"))})  # noqa: E950
        """.strip())
    else:
        assert exc.filename is None
        assert exc.lineno is None
        assert exc.offset is None
        assert (
            exc.text ==
            f"""{item_property}=Field(XPathExtractor("./span[@class='name']"))"""
        )
def test_field_with_convertor():
    f = Field(D(), convertor=lambda x: str(x).upper())
    assert f.type is None
    assert f.extract("abc") == "ABC"
    f = Field(D(), type=str, convertor=lambda x: str(x).upper())
    assert f.type is str
    assert f.extract("abc") == "ABC"
示例#8
0
def test_field_name_overwrite_item_parameter_type_creation():
    with pytest.raises(SyntaxError) as catch:
        # fmt: off
        type("Parameter", (Item, ),
             {"name": Field(XPathExtractor("./span[@class='name']"))})
        # fmt: on

    exc = catch.value
    assert exc.filename == __file__
    assert exc.lineno == inspect.currentframe().f_lineno - 5
    assert exc.offset == 8
    assert (
        exc.text ==
        'type("Parameter", (Item,), {"name": Field(XPathExtractor("./span[@class=\'name\']"))})'
    )
示例#9
0
def test_field_extract_without_default(element0, Extractor, expr, build_first):
    extractor = Field(Extractor(expr))
    assert not extractor.built
    assert not extractor.extractor.built
    if build_first:
        extractor.build()
        assert extractor.built
        assert extractor.extractor.built

    with pytest.raises(ExtractError) as catch:
        extractor.extract(element0)

    assert extractor.built
    assert extractor.extractor.built

    exc = catch.value
    assert len(exc.extractors) == 1
    assert exc.extractors[0] is extractor
    assert exc.element is element0
示例#10
0
 class Count(Item):
     follower = Field(JSONExtractor("count_follower"))
     following = Field(JSONExtractor("count_following"))
     like = Field(JSONExtractor("count_like"))
示例#11
0
 class User(Item):
     uid = Field(JSONExtractor("id"))
     username = Field(JSONExtractor("name"), name="name")
示例#12
0
        class User(Item):
            def baz(self):
                pass

            baz = Field(JSONExtractor("baz"))  # noqa: F811
示例#13
0
    class User(Item):  # noqa: F811
        baz_ = Field(JSONExtractor("baz"), name="baz")

        def baz(self):
            pass
示例#14
0
def test_field_parameters_conflict():
    with pytest.raises(ValueError):
        Field(TextCSSExtractor(".nomatter"), is_many=True, default=None)
示例#15
0
 class UserWithGender(User):
     gender = Field(JSONExtractor("gender"))
示例#16
0
 class User(Item):
     uid = Field(JSONExtractor("id"))
     username = Field(JSONExtractor("name"), name="name")
     gender = Field(JSONExtractor("gender"), default=None)
示例#17
0
 class UserResponse(Item):
     start = Field(JSONExtractor("start"), default=0)
     size = Field(JSONExtractor("size"))
     total = Field(JSONExtractor("total"))
     data = User(JSONExtractor("users[*]"), is_many=True)
示例#18
0
 class User(Item):
     field_names = Field(JSONExtractor("field_names"))
示例#19
0
 class ChannelItem(Item):
     title = Field(XPathExtractor("./title/text()"), default="")
     link = Field(XPathExtractor("./link/text()"), default="")
     description = Field(XPathExtractor("./description/text()"))
     publish_date = Field(XPathExtractor("./pubDate/text()"))
     guid = Field(XPathExtractor("./guid/text()"))
示例#20
0
 class Article(Item):
     title = Field(XPathExtractor("./div[@class='title']/text()"))
     content = Field(XPathExtractor("./div[@class='content']/text()"))
示例#21
0
 class User(Item):  # noqa: F811
     field_names_ = Field(JSONExtractor("field_names"), name="field_names")
示例#22
0
 class Users(Item):
     users = User(JSONExtractor("users"), is_many=True)
     count = Field(JSONExtractor("count"), default=0)
示例#23
0
 class User(Item):
     id = Field(JSONExtractor("id"))
     name_ = Field(JSONExtractor("name"), name="name")
示例#24
0
 class User(Item):
     uid = Field(JSONExtractor("id"))
     username = Field(JSONExtractor("username"))
     count = Count()
示例#25
0
        class User(Item):
            baz = Field(JSONExtractor("baz"))

            def baz(self):
                pass
示例#26
0
def test_misplacing():
    class ComplexExtractor(Item):
        pass

    with pytest.raises(ValueError):
        Field(extractor=ComplexExtractor(extractor=JSONExtractor("users[*]")))
示例#27
0
 class User(Item):
     uid = Field(JSONExtractor("id"))
示例#28
0
 class Parameter(Item):
     name = Field(
         XPathExtractor("./span[@class='name']"))  # noqa: B950, E701
示例#29
0
def test_lazy_str():
    string = ""

    def func():
        nonlocal string
        return string

    ls = LazyStr(func=func)
    assert str(ls) == ""

    string = "abc"
    assert str(ls) == "abc"


@pytest.fixture(params=[Field(), Item()], ids=repr)
def complex_extractor(request):
    return request.param


@pytest.fixture(
    params=[
        AttrCSSExtractor(expr="div.class", attr="id") if not _missing_cssselect
        else pytest.param("Missing 'cssselect'", marks=pytest.mark.skip()),
        CSSExtractor(expr="div.class") if not _missing_cssselect else
        pytest.param("Missing 'cssselect'", marks=pytest.mark.skip()),
        JSONPathExtractor(expr="boo") if not _missing_jsonpath else
        pytest.param("Missing 'jsonpath-extractor'", marks=pytest.mark.skip()),
        JSONPathRWExtractor(expr="boo") if not _missing_jsonpath_rw else
        pytest.param("Missing 'jsonpath-rw'", marks=pytest.mark.skip()),
        JSONPathRWExtExtractor(expr="boo") if not _missing_jsonpath_rw_ext else
示例#30
0
def test_type_creation():
    type("Foo", (Item, ), {"bar": Field(JSONExtractor("bar"))})