示例#1
0
def test_item_with_no_data_should_be_excluded(shining):
    rules = [
        Rule(key="title", value=Path("//title/text()")),
        Rule(key="foo", value=Path("//foo/text()")),
    ]
    data = Items(rules)(shining)
    assert data == {"title": "The Shining"}
示例#2
0
def test_multivalued_subrules_should_generate_list_of_subitems(shining):
    rules = [
        Rule(
            key="cast",
            value=Items(
                foreach='//table[@class="cast"]/tr',
                rules=[
                    Rule(key="name", value=Path("./td[1]/a/text()")),
                    Rule(key="character", value=Path("./td[2]/text()")),
                ],
            ),
        )
    ]
    data = Items(rules)(shining)
    assert data == {
        "cast": [
            {
                "character": "Jack Torrance",
                "name": "Jack Nicholson"
            },
            {
                "character": "Wendy Torrance",
                "name": "Shelley Duvall"
            },
        ]
    }
示例#3
0
def test_key_should_be_generatable_using_path(shining):
    rules = [
        Rule(foreach='//div[@class="info"]',
             key=Path("./h3/text()"),
             value=Path("./p/text()"))
    ]
    data = Items(rules)(shining)
    assert data == {"Language:": "English", "Runtime:": "144 minutes"}
示例#4
0
def test_generated_key_none_should_be_excluded(shining):
    rules = [
        Rule(foreach='//div[@class="info"]',
             key=Path("./foo/text()"),
             value=Path("./p/text()"))
    ]
    data = Items(rules)(shining)
    assert data == {}
示例#5
0
def test_multiple_rules_should_generate_multiple_items(shining):
    rules = [
        Rule(key="title", value=Path("//title/text()")),
        Rule("year", value=Path('//span[@class="year"]/text()',
                                transform=int)),
    ]
    data = Items(rules)(shining)
    assert data == {"title": "The Shining", "year": 1980}
示例#6
0
def test_generated_key_should_be_transformable(shining):
    rules = [
        Rule(
            foreach='//div[@class="info"]',
            key=Path("./h3/text()", transform=lambda s: s.lower()[:-1]),
            value=Path("./p/text()"),
        )
    ]
    data = Items(rules)(shining)
    assert data == {"language": "English", "runtime": "144 minutes"}
示例#7
0
def test_empty_values_should_be_excluded_from_multivalued_item_list(shining):
    rules = [
        Rule(key="foos",
             value=Path(foreach='//ul[@class="foos"]/li', path="./text()"))
    ]
    data = Items(rules)(shining)
    assert data == {}
示例#8
0
def test_set_attr_name_from_path_empty_value_should_be_ignored(shining):
    name = Path("./@bar")
    preprocessors.set_attr(path="//ul[@class='genres']/li",
                           name=name,
                           value="bar")(shining)
    data = xpather("//li[@Horror]/@Horror")(shining)
    assert data == []
示例#9
0
def test_multivalued_item_should_be_list(shining):
    rules = [
        Rule(key="genres",
             value=Path(foreach='//ul[@class="genres"]/li', path="./text()"))
    ]
    data = Items(rules)(shining)
    assert data == {"genres": ["Horror", "Drama"]}
示例#10
0
def test_extracted_texts_should_be_concatenated_using_given_separator(shining):
    rules = [
        Rule(key="cast_names",
             value=Path('//table[@class="cast"]/tr/td[1]/a/text()', sep=", "))
    ]
    data = Items(rules)(shining)
    assert data == {"cast_names": "Jack Nicholson, Shelley Duvall"}
示例#11
0
def test_extracted_text_should_be_transformable(shining):
    rules = [
        Rule(key="year",
             value=Path('//span[@class="year"]/text()', transform=int))
    ]
    data = Items(rules)(shining)
    assert data == {"year": 1980}
示例#12
0
def test_set_attr_name_from_path_should_set_attribute_for_selected_elements(
        shining):
    name = Path("./text()")
    preprocessors.set_attr(path="//ul[@class='genres']/li",
                           name=name,
                           value="bar")(shining)
    data = xpather("//li[@Horror]/@Horror")(shining)
    assert data == ["bar"]
示例#13
0
def test_set_attr_value_from_path_should_set_attribute_for_selected_elements(
        shining):
    value = Path("./text()")
    preprocessors.set_attr(path="//ul[@class='genres']/li",
                           name="foo",
                           value=value)(shining)
    data = xpather("//li[@foo]/@foo")(shining)
    assert data == ["Horror", "Drama"]
示例#14
0
def test_section_no_roots_should_return_empty_result(shining):
    rules = [
        Rule(
            key="director",
            value=Items(section="//foo",
                        rules=[Rule(key="name", value=Path("./text()"))]),
        )
    ]
    data = Items(rules)(shining)
    assert data == {}
示例#15
0
def test_subrules_should_generate_subitems(shining):
    rules = [
        Rule(
            key="director",
            value=Items(rules=[
                Rule(key="name",
                     value=Path('//div[@class="director"]//a/text()')),
                Rule(key="link",
                     value=Path('//div[@class="director"]//a/@href')),
            ]),
        )
    ]
    data = Items(rules)(shining)
    assert data == {
        "director": {
            "link": "/people/1",
            "name": "Stanley Kubrick"
        }
    }
示例#16
0
def test_section_multiple_roots_should_raise_error(shining):
    with pytest.raises(ValueError):
        rules = [
            Rule(
                key="director",
                value=Items(section="//div",
                            rules=[Rule(key="name", value=Path("./text()"))]),
            )
        ]
        Items(rules)(shining)
示例#17
0
def test_transformers_should_be_chainable(shining):
    rules = [
        Rule(
            key="century",
            value=Path('//span[@class="year"]/text()',
                       transform=chain(int, lambda x: x // 100 + 1)),
        )
    ]
    data = Items(rules)(shining)
    assert data == {"century": 20}
示例#18
0
def test_section_should_set_root_for_queries(shining):
    rules = [
        Rule(
            key="director",
            value=Items(
                section='//div[@class="director"]//a',
                rules=[
                    Rule(key="name", value=Path("./text()")),
                    Rule(key="link", value=Path("./@href")),
                ],
            ),
        )
    ]
    data = Items(rules)(shining)
    assert data == {
        "director": {
            "link": "/people/1",
            "name": "Stanley Kubrick"
        }
    }
示例#19
0
def test_multivalued_items_should_be_transformable(shining):
    rules = [
        Rule(
            key="genres",
            value=Path(foreach='//ul[@class="genres"]/li',
                       path="./text()",
                       transform=str.lower),
        )
    ]
    data = Items(rules)(shining)
    assert data == {"genres": ["horror", "drama"]}
示例#20
0
def test_subitems_should_be_transformable(shining):
    rules = [
        Rule(
            key="cast",
            value=Items(
                foreach='//table[@class="cast"]/tr',
                rules=[
                    Rule(key="name", value=Path("./td[1]/a/text()")),
                    Rule(key="character", value=Path("./td[2]/text()")),
                ],
                transform=lambda x: "%(name)s as %(character)s" % x,
            ),
        )
    ]
    data = Items(rules)(shining)
    assert data == {
        "cast": [
            "Jack Nicholson as Jack Torrance",
            "Shelley Duvall as Wendy Torrance"
        ]
    }
示例#21
0
def test_item_with_false_value_should_be_included():
    content = '<root><foo val=""/></root>'
    rules = [Rule(key="foo", value=Path("//foo/@val", transform=bool))]
    data = Items(rules)(build_tree(content))
    assert data == {"foo": False}
示例#22
0
def test_item_with_empty_str_value_should_be_included():
    content = '<root><foo val=""/></root>'
    rules = [Rule(key="foo", value=Path("//foo/@val"))]
    data = Items(rules)(build_tree(content))
    assert data == {"foo": ""}
示例#23
0
def test_set_text_value_from_path_should_set_text_for_selected_elements(
        shining):
    text = Path("./text()", transform=transformers.lower)
    preprocessors.set_text(path="//ul[@class='genres']/li", text=text)(shining)
    data = xpather("//ul[@class='genres']/li/text()")(shining)
    assert data == ["horror", "drama"]
示例#24
0
def test_set_text_empty_value_should_be_ignored(shining):
    text = Path("./@foo")
    preprocessors.set_text(path="//ul[@class='genres']/li", text=text)(shining)
    data = xpather("//ul[@class='genres']/li/text()")(shining)
    assert data == []
示例#25
0
def test_extracted_texts_should_be_concatenated(shining):
    rules = [Rule(key="full_title", value=Path("//h1//text()"))]
    data = Items(rules)(shining)
    assert data == {"full_title": "The Shining (1980)"}
示例#26
0
def test_extracted_text_should_be_scalar(shining):
    rules = [Rule(key="title", value=Path("//title/text()"))]
    data = Items(rules)(shining)
    assert data == {"title": "The Shining"}