Exemplo n.º 1
0
def test_robotstxt_to_df_saves_single_file():
    robotstxt_to_df('https://www.media-supermarket.com/robots.txt',
                    output_file='robots_output.jl')
    result = pd.read_json('robots_output.jl', lines=True)
    assert isinstance(result, pd.core.frame.DataFrame)
    assert all(col in result
               for col in ['directive', 'content', 'download_date'])
    os.remove('robots_output.jl')
Exemplo n.º 2
0
def test_robotstxt_to_df():
    result = robotstxt_to_df('https://www.media-supermarket.com/robots.txt')
    assert isinstance(result, pd.core.frame.DataFrame)
    assert all(col in result
               for col in ['directive', 'content', 'download_date'])
Exemplo n.º 3
0
def test_robotstxt_to_df_contains_errors():
    result = robotstxt_to_df('wrong_url.html')
    assert 'errors' in result
Exemplo n.º 4
0
def test_robotstxt_to_df_raises_on_wrong_file():
    with pytest.raises(ValueError):
        robotstxt_to_df(robots_file, output_file='wrong_extension.pdf')