def test_country_scrapers_returns_only_counties_default():
    r = Runner()
    names = [s.__name__ for s in r.country_scrapers()]
    assert 'Bra' in names
    assert 'Pak' in names
    assert 'CountryScraper' not in names
    assert 'Runner' not in names
Пример #2
0
def process_data(country, cache_dir):
    """the main function to instantiate a runner, and process data
    """
    # No alert manager (e.g., could be slack)
    runner = Runner(alert_manager=None)

    # Make sure geckodriver is on the path
    os.environ["PATH"] = "%s:%s" % (os.getcwd(), os.environ["PATH"])

    # This would equivalent to:
    # covid-world-scraper --cache-dir=$PWD/covid-cache bra
    print(f"Processing {country}")
    runner.run(cache_dir=cache_dir, headless_status=True, filter=[country])
def test_run_is_called_on_country_scrapers():
    # Patch Runner.country_scrapers to return a limited
    # set of countries
    mock_scraper_classes = [
        Mock(name='Bra'),
        Mock(name='Pak'),
    ]
    with patch('covid_world_scraper.runner.Runner.country_scrapers'
               ) as mock_method:
        mock_method.return_value = mock_scraper_classes
        r = Runner()
        scrapers = r.run()
        for scraper in scrapers:
            scraper.run.assert_called_once()
Пример #4
0
def cli(countries, all, alert, cache_dir, list_scrapers, log_file, headless):
    """Scrape data for one or more countries."""
    # Ensure cache directory exists
    Path(cache_dir).mkdir(parents=True, exist_ok=True)
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(name)-12s - %(message)s',
                        datefmt='%m-%d %H:%M',
                        filename=log_file,
                        filemode='a')
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(name)-12s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)
    logger = logging.getLogger(__name__)

    # Set up alert manager
    alert_manager = None
    if alert:
        try:
            api_key = os.environ['COVID_WORLD_SLACK_API_KEY']
            channel = os.environ['COVID_WORLD_SLACK_CHANNEL']
            alert_msg = "Slack alerts will be sent to #{}.".format(channel)
            alert_manager = SlackAlertManager(api_key, channel)
        except KeyError:
            alert_msg = "WARNING - Slack alerts will not be sent.\n" + \
                "Please ensure you've configured the below environment variables:\n" + \
                "COVID_WORLD_SLACK_API_KEY=YOUR_API_KEY\n" + \
                "COVID_WORLD_SLACK_CHANNEL=channel-name\n\n"
        finally:
            logger.warning(alert_msg)

    runner = Runner(alert_manager=alert_manager)

    if list_scrapers:
        click.echo('Available country scrapers:')
        for country in runner.list_countries():
            msg = '- {}'.format(country)
            click.echo(msg)
    else:
        kwargs = {
            'cache_dir': cache_dir,
            'headless_status': headless,
            'filter': countries,
        }
        try:
            runner.run(**kwargs)
            if alert and alert_manager:
                runner.send_alerts()
        except Exception as e:
            traceback_str = ''.join(traceback.format_tb(e.__traceback__))
            logger.error(
                "ERROR: A fatal error occurred while running scrapers or sending alerts!!!"
            )
            logger.error(traceback_str)
def test_send_alerts():
    # Mock two fake scrapers, second of which raises a generic error
    with patch('covid_world_scraper.runner.Runner.country_scrapers'
               ) as mock_method:
        mock_scraper_classes = [
            Mock(name='Bra', country_code='BRA'),
            Mock(name='Pak', side_effect=Exception('Woe is me'))
        ]
        mock_method.return_value = mock_scraper_classes
        with patch('covid_world_scraper.alerts.WebClient.chat_postMessage'
                   ) as mock_post:
            # Configure runner with an alert manager instance
            manager = SlackAlertManager('APIKEY', 'some-channel')
            r = Runner(alert_manager=manager)
            # Run generates messages but does not automatically
            # send them
            r.run()
            mock_post.assert_not_called()
            # Slack client should be called when
            # we request alerts to be sent
            r.send_alerts()
            mock_post.assert_called()
            assert mock_post.call_count == 2
            success_call, error_call = mock_post.call_args_list
            success_msg = success_call[1]['text']
            expected = '1 scraper(s) ran successfully'
            assert expected in success_msg
            error_msg = error_call[1]['text']
            assert 'Woe is me' in error_msg
Пример #6
0
def list_countries():
    """Return list of all countries available, which will be mapped to the
       process data function (to run in parallel if possible).
    """
    runner = Runner(alert_manager=None)
    return [x.split(' ')[0] for x in runner.list_countries()]
Пример #7
0
#!/usr/bin/env python3

# This is a test for running a data scrape, separate from a workflow. We want
# to make sure this works in it's simple before before adding to a workflow.
# We are using an application to process covid data via:
# https://github.com/biglocalnews/covid-world-scraper

from covid_world_scraper import Runner
import os

# No alert manager (e.g., could be slack)
runner = Runner(alert_manager=None)

# runner.list_countries()
# ['BRA (Brazil)',
# 'DEU (Germany)',
# 'IND (India)',
# 'KOR (South Korea)',
# 'NGA (Nigeria)',
# 'PAK (Pakistan)',
# 'ZAF (South Africa)']

# Set a custom cache directory, run headless, and
cache_dir = os.path.join(os.getcwd(), 'covid-cache')
if not os.path.exists(cache_dir):
    os.mkdir(cache_dir)

# Testing a single run!
os.environ["PATH"] = "%s:%s" % (os.getcwd(), os.environ["PATH"])
runner.run(cache_dir=cache_dir, headless_status=True, filter=["BRA"])
def test_list_countries():
    r = Runner()
    countries = r.list_countries()
    assert 'PAK (Pakistan)' in countries
def test_country_scrapers_with_incorrect_country_name():
    with pytest.raises(CountryScraperError) as excep:
        r = Runner()
        names = [s.__name__ for s in r.country_scrapers(filter=['Foo'])]
def test_country_scrapers_filter():
    r = Runner()
    names = [s.__name__ for s in r.country_scrapers(filter=['Bra'])]
    assert 'Bra' in names
    assert 'Pak' not in names
def test_country_codes():
    r = Runner()
    codes = ['BRA', 'PAK']
    assert set(r.country_codes.keys()).issuperset(codes)