示例#1
0
def test_set_attr_value_from_path_should_set_attribute_for_selected_elements(
        shining_content):
    pre = [{
        "op": "set_attr",
        "path": '//ul[@class="genres"]/li',
        "name": "foo",
        "value": {
            "path": "./text()"
        },
    }]
    items = [{
        "key": "genres",
        "value": {
            "foreach": "//li[@foo]",
            "path": "./@foo"
        }
    }]
    data = scrape(shining_content, {"pre": pre, "items": items})
    assert data == {"genres": ["Horror", "Drama"]}
示例#2
0
def test_set_attr_value_from_path_empty_value_should_be_ignored(
        shining_content):
    pre = [{
        "op": "set_attr",
        "path": '//ul[@class="genres"]/li',
        "name": "foo",
        "value": {
            "path": "./@bar"
        },
    }]
    items = [{
        "key": "genres",
        "value": {
            "foreach": "//li[@foo]",
            "path": "./@foo"
        }
    }]
    data = scrape(shining_content, {"pre": pre, "items": items})
    assert data == {}
示例#3
0
def test_remove_selected_none_should_not_cause_error(shining_content):
    pre = [{"op": "remove", "path": "//tr[50]"}]
    items = [{
        "key": "cast",
        "value": {
            "foreach": '//table[@class="cast"]/tr',
            "items": [{
                "key": "name",
                "value": {
                    "path": "./td[1]/a/text()"
                }
            }],
        },
    }]
    data = scrape(shining_content, {"pre": pre, "items": items})
    assert data == {
        "cast": [{
            "name": "Jack Nicholson"
        }, {
            "name": "Shelley Duvall"
        }]
    }
示例#4
0
def test_multivalued_subrules_should_generate_list_of_subitems(
        shining_content):
    items = [{
        "key": "cast",
        "value": {
            "foreach":
            '//table[@class="cast"]/tr',
            "items": [
                {
                    "key": "name",
                    "value": {
                        "path": "./td[1]/a/text()"
                    }
                },
                {
                    "key": "character",
                    "value": {
                        "path": "./td[2]/text()"
                    }
                },
            ],
        },
    }]
    data = scrape(shining_content, {"items": items})
    assert data == {
        "cast": [
            {
                "character": "Jack Torrance",
                "name": "Jack Nicholson"
            },
            {
                "character": "Wendy Torrance",
                "name": "Shelley Duvall"
            },
        ]
    }
示例#5
0
def test_shorthand_notation_should_be_path_and_transform(shining_content):
    items = [{"key": "year", "value": '//span[@class="year"]/text() | int'}]
    data = scrape(shining_content, {"items": items})
    assert data == {"year": 1980}
示例#6
0
def test_empty_rules_should_return_empty_result(shining_content):
    data = scrape(shining_content, {"items": []})
    assert data == {}
示例#7
0
def test_unknown_preprocessor_should_raise_error(shining_content):
    with pytest.raises(ValueError):
        pre = [{"op": "foo", "path": "//tr[1]"}]
        scrape(shining_content, {"pre": pre})
示例#8
0
def test_extracted_texts_should_be_concatenated(shining_content):
    items = [{"key": "full_title", "value": {"path": "//h1//text()"}}]
    data = scrape(shining_content, {"items": items})
    assert data == {"full_title": "The Shining (1980)"}
示例#9
0
def test_extracted_text_should_be_scalar(shining_content):
    items = [{"key": "title", "value": {"path": "//title/text()"}}]
    data = scrape(shining_content, {"items": items})
    assert data == {"title": "The Shining"}