示例#1
0
文件: apd.py 项目: mscarey/scrapd
async def fetch_and_parse(session, url):
    """
    Parse a fatality page from a URL.

    :param aiohttp.ClientSession session: aiohttp session
    :param str url: detail page URL
    :return: a dictionary representing a fatality.
    :rtype: dict
    """
    # Retrieve the page.
    page = await fetch_detail_page(session, url)
    if not page:
        raise ValueError(f'The URL {url} returned a 0-length content.')

    # Parse it.
    deceased_people = parsing.parse_page(page, url)
    entries = []

    person_index = 0
    for d in deceased_people:
        # Add the link.
        d[Fields.LINK] = url
        # Add a unique ID
        d[Fields.ID] = f"{d[Fields.CASE]}-{person_index}"
        person_index += 1
        entries.append(d)

    if not entries:
        raise ValueError(f'No data could be extracted from the page {url}.')

    return entries
示例#2
0
def test_parse_page_00(filename, expected):
    """Ensure information are properly extracted from the page.
       Don't compare notes if parsed from details page."""
    page_fd = TEST_DATA_DIR / filename
    page = page_fd.read_text()
    actual = next(parsing.parse_page(page, fake.uri()))
    if 'Notes' in actual and 'Notes' not in expected:
        del actual['Notes']
    assert actual == expected
示例#3
0
def test_no_DOB_field_when_DOB_not_provided():
    """
    Test that "Hispanic male, 19 years of age" does not
    generate a DOB field.
    """
    page_fd = TEST_DATA_DIR / 'traffic-fatality-20-4'
    page = page_fd.read_text()
    parsed_content = next(parsing.parse_page(page, 'fake_url'))
    assert not parsed_content.get(Fields.DOB)
示例#4
0
def test_parse_page_01(mocker, filename, expected):
    """Ensuri
    ng ."""
    data = {}
    parsing_errors = ['one error']
    page_fd = TEST_DATA_DIR / filename
    page = page_fd.read_text()
    pc = mocker.patch('scrapd.core.parsing.parse_page_content', return_value=(data, parsing_errors))
    _ = parsing.parse_page(page, fake.uri())
    assert pc.called_once
示例#5
0
def test_multiple_deceased(filename, expected):
    """
    Ensure that the second record yielded by parsing.parse_page
    is the second deceased person from a collision.
    """
    page_text = load_test_page(filename)
    content_parser = parsing.parse_page(page_text, 'fake_url')
    _ = next(content_parser)
    second_person = next(content_parser)
    for key in expected:
        assert second_person[key] == expected[key]
示例#6
0
def test_parse_page_get_location(filename, expected):
    """Ensure location information is properly extracted from the page."""
    page_fd = TEST_DATA_DIR / filename
    page = page_fd.read_text()
    actual = parsing.parse_page(page, fake.uri())
    assert next(actual)['Location'] == expected
示例#7
0
def test_parse_page_with_missing_data():
    records = parsing.parse_page("Case:    19-1234567", fake.uri())
    with pytest.raises(StopIteration):
        next(records)