def nyt(): nyt_file = File("nytimes.html") nyt_url = ("https://bites-data.s3.us-east-2.amazonaws.com/" "2020-03-10_nytimes.html") nyt_web = Web(nyt_url, nyt_file) yield NYTimes(nyt_web) if CLEAN_UP: if nyt_web.file.path.exists(): nyt_web.file.path.unlink()
def rcp(): rcp_file = File("realclearpolitics.html") rcp_url = ("https://bites-data.s3.us-east-2.amazonaws.com/" "2020-03-10_realclearpolitics.html") rcp_web = Web(rcp_url, rcp_file) yield RealClearPolitics(rcp_web) if CLEAN_UP: if rcp_web.file.path.exists(): rcp_web.file.path.unlink()
def test_web_bad_url(): file = File("clamytoe.html") url = "https://clamytoe.dev" test_web = Web(url, file) with pytest.raises(URLError) as e: test_web.data error = str(e.value) assert 'urlopen error' in error assert ('Name or service not known' in error or 'nodename nor servname provided, or not known' in error)
def test_file_class(): file = File("empty.html") assert str(file.path) == f"{TMP}\\{TODAY}_{file.name}" assert file.data is None
def test_file(): file = File("test.html") yield file if CLEAN_UP: if file.path.exists(): file.path.unlink()
def test_file_class(): file = File("empty.html") # Fix to accept DOS pathnames on a Windoze machine assert str(file.path) == f"{TMP}/{TODAY}_{file.name}" or str( file.path) == f"{TMP}\\{TODAY}_{file.name}" assert file.data is None