def main(excel_path, gsheet_auth, updatesheets, updatetabs, scrapers, basic_auths, other_auths, nojson, countries_override, save, use_saved, **ignore): logger.info('##### hdx-scraper-covid-viz version %.1f ####' % VERSION) configuration = Configuration.read() with temp_dir() as temp_folder: with Download(rate_limit={'calls': 1, 'period': 0.1}) as downloader: retriever = Retrieve(downloader, temp_folder, 'saved_data', temp_folder, save, use_saved) if scrapers: logger.info('Updating only scrapers: %s' % scrapers) tabs = configuration['tabs'] if updatetabs is None: updatetabs = list(tabs.keys()) logger.info('Updating all tabs') else: logger.info('Updating only these tabs: %s' % updatetabs) noout = NoOutput(updatetabs) if excel_path: excelout = ExcelOutput(excel_path, tabs, updatetabs) else: excelout = noout if gsheet_auth: gsheets = GoogleSheets(configuration, gsheet_auth, updatesheets, tabs, updatetabs) else: gsheets = noout if nojson: jsonout = noout else: jsonout = JsonOutput(configuration, updatetabs) outputs = {'gsheets': gsheets, 'excel': excelout, 'json': jsonout} today = datetime.now() countries_to_save = get_indicators(configuration, today, retriever, outputs, updatetabs, scrapers, basic_auths, other_auths, countries_override) jsonout.add_additional_json(downloader, today=today) jsonout.save(countries_to_save=countries_to_save) excelout.save()
def test_run(self, configuration, fixtures_dir): with temp_dir("TestIATIViz", delete_on_success=True, delete_on_failure=False) as tempdir: with Download(user_agent="test") as downloader: retriever = Retrieve( downloader, tempdir, fixtures_dir, tempdir, save=False, use_saved=True, ) today = "2021-05-06" start( configuration, today, retriever, tempdir, dportal_params=None, whattorun="covid", filterdate="2020-01", ) for filename in ("flows", "transactions", "reporting_orgs"): csv_filename = f"{filename}.csv" expected_file = join(fixtures_dir, csv_filename) actual_file = join(tempdir, csv_filename) assert_files_same(expected_file, actual_file) json_filename = f"{filename}.json" expected_file = join(fixtures_dir, json_filename) actual_file = join(tempdir, json_filename) assert filecmp.cmp(expected_file, actual_file)
def main(save, use_saved, **ignore): """Generate dataset and create it in HDX""" with Download(extra_params_yaml=join(expanduser('~'), '.extraparams.yml'), extra_params_lookup=lookup) as token_downloader: configuration = Configuration.read() with Download() as downloader: folder = temp_dir(lookup) retriever = Retrieve(downloader, folder, 'saved_data', folder, save, use_saved) wfp = WFPFood(configuration, token_downloader, retriever) countries = wfp.get_countries() logger.info('Number of country datasets to upload: %d' % len(countries)) wfp.build_mappings() for info, country in progress_storing_tempdir( lookup, countries, 'iso3'): dataset, showcase, qc_indicators = wfp.generate_dataset_and_showcase( country['iso3'], info['folder']) if dataset: dataset.update_from_yaml() dataset['notes'] = dataset[ 'notes'] % 'Food Prices data for %s. Food prices data comes from the World Food Programme and covers' % country[ 'name'] dataset.generate_resource_view(-1, indicators=qc_indicators) dataset.create_in_hdx( remove_additional_resources=True, hxl_update=False, updated_by_script='HDX Scraper: WFP Food Prices', batch=info['batch']) showcase.create_in_hdx() showcase.add_dataset(dataset)
def main( output_dir, saved_dir, save, use_saved, dportal_params, whattorun, filterdate, **ignore, ): logger.info(f"##### hdx-scraper-iati-viz version {VERSION:.1f} ####") configuration = Configuration.read() output_dir = f"{output_dir}_{whattorun}" rmtree(output_dir, ignore_errors=True) mkdir(output_dir) with Download() as downloader: retriever = Retrieve( downloader, configuration["fallback_dir"], f"{saved_dir}_{whattorun}", output_dir, save, use_saved, ) today = datetime.utcnow().isoformat() start( configuration, today, retriever, output_dir, dportal_params, whattorun, filterdate, )
def test_get_indicators(self, configuration, folder): with temp_dir('TestCovidViz', delete_on_success=True, delete_on_failure=False) as tempdir: with Download(user_agent='test') as downloader: retriever = Retrieve(downloader, tempdir, folder, tempdir, save=False, use_saved=True) tabs = configuration['tabs'] noout = NoOutput(tabs) jsonout = JsonOutput(configuration, tabs) outputs = {'gsheets': noout, 'excel': noout, 'json': jsonout} today = parse_date('2021-05-03') countries_to_save = get_indicators( configuration, today, retriever, outputs, tabs, scrapers=[ 'ifi', 'who_global', 'who_national', 'who_subnational', 'who_covid', 'sadd', 'covidtests', 'cadre_harmonise', 'access', 'food_prices' ], use_live=False) filepaths = jsonout.save(tempdir, countries_to_save=countries_to_save) assert filecmp.cmp(filepaths[0], join(folder, 'test_scraper_all.json')) assert filecmp.cmp(filepaths[1], join(folder, 'test_scraper.json')) assert filecmp.cmp(filepaths[2], join(folder, 'test_scraper_daily.json')) assert filecmp.cmp( filepaths[3], join(folder, 'test_scraper_covidseries.json'))
def test_retrieve_file(self, tmpdir, retrieverfolder, fallback_dir): tmpdir = str(tmpdir) saved_dir = join(tmpdir, "saved") temp_dir = join(tmpdir, "temp") rmtree(temp_dir, ignore_errors=True) mkdir(temp_dir) with Download() as downloader: with pytest.raises(ValueError): Retrieve( downloader, fallback_dir, saved_dir, temp_dir, save=True, use_saved=True, ) retriever = Retrieve( downloader, fallback_dir, saved_dir, temp_dir, save=False, use_saved=False, ) filename = "test.txt" url = join(retrieverfolder, filename) path = retriever.retrieve_file(url, filename, logstr="test file", fallback=True) assert path == join(temp_dir, filename) path = retriever.retrieve_file("NOTEXIST", filename, logstr="test file", fallback=True) assert path == join(fallback_dir, filename) with pytest.raises(DownloadError): retriever.retrieve_file("NOTEXIST", filename, fallback=False) with pytest.raises(DownloadError): long_url = "".join( random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(150)) retriever.retrieve_file(long_url, filename, fallback=False) text = retriever.retrieve_text(url, filename, logstr="test file", fallback=False) assert text == "hello" text = retriever.retrieve_text("NOTEXIST", filename, logstr="test file", fallback=True) assert text == "goodbye" with pytest.raises(DownloadError): retriever.retrieve_text("NOTEXIST", filename, fallback=False) filename = "test.yaml" url = join(retrieverfolder, filename) data = retriever.retrieve_yaml(url, filename, logstr="test file", fallback=False) assert data["param_1"] == "ABC" data = retriever.retrieve_yaml("NOTEXIST", filename, logstr="test file", fallback=True) assert data["param_1"] == "XYZ" with pytest.raises(DownloadError): retriever.retrieve_yaml("NOTEXIST", filename, fallback=False) filename = "test.json" url = join(retrieverfolder, filename) data = retriever.retrieve_json(url, filename, logstr="test file", fallback=False) assert data["my_param"] == "abc" data = retriever.retrieve_json("NOTEXIST", filename, logstr="test file", fallback=True) assert data["my_param"] == "xyz" with pytest.raises(DownloadError): retriever.retrieve_json("NOTEXIST", filename, fallback=False) retriever = Retrieve( downloader, fallback_dir, saved_dir, temp_dir, save=True, use_saved=False, ) filename = "test.txt" url = join(retrieverfolder, filename) path = retriever.retrieve_file(url, filename, logstr="test file", fallback=True) assert path == join(saved_dir, filename) path = retriever.retrieve_file("NOTEXIST", filename, logstr="test file", fallback=True) assert path == join(fallback_dir, filename) with pytest.raises(DownloadError): retriever.retrieve_file("NOTEXIST", filename, fallback=False) text = retriever.retrieve_text(url, filename, logstr="test file", fallback=False) assert text == "hello" text = retriever.retrieve_text("NOTEXIST", filename, logstr="test file", fallback=True) assert text == "goodbye" with pytest.raises(DownloadError): retriever.retrieve_text("NOTEXIST", filename, fallback=False) filename = "test.yaml" url = join(retrieverfolder, filename) data = retriever.retrieve_yaml(url, filename, logstr="test file", fallback=False) assert data["param_1"] == "ABC" data = retriever.retrieve_yaml("NOTEXIST", filename, logstr="test file", fallback=True) assert data["param_1"] == "XYZ" with pytest.raises(DownloadError): retriever.retrieve_yaml("NOTEXIST", filename, fallback=False) filename = "test.json" url = join(retrieverfolder, filename) data = retriever.retrieve_json(url, filename, logstr="test file", fallback=False) assert data["my_param"] == "abc" data = retriever.retrieve_json("NOTEXIST", filename, logstr="test file", fallback=True) assert data["my_param"] == "xyz" with pytest.raises(DownloadError): retriever.retrieve_json("NOTEXIST", filename, fallback=False) retriever = Retrieve( downloader, fallback_dir, saved_dir, temp_dir, save=False, use_saved=True, ) filename = "test.txt" url = join(retrieverfolder, filename) path = retriever.retrieve_file(url, filename, logstr="test file", fallback=True) assert path == join(saved_dir, filename) path = retriever.retrieve_file("NOTEXIST", filename, logstr="test file", fallback=True) assert path == join(saved_dir, filename) path = retriever.retrieve_file("NOTEXIST", filename, fallback=False) assert path == join(saved_dir, filename) text = retriever.retrieve_text(url, filename, logstr="test file", fallback=False) assert text == "hello" text = retriever.retrieve_text("NOTEXIST", filename, logstr="test file", fallback=True) assert text == "hello" text = retriever.retrieve_text("NOTEXIST", filename, fallback=False) assert text == "hello" filename = "test.yaml" url = join(retrieverfolder, filename) data = retriever.retrieve_yaml(url, filename, logstr="test file", fallback=False) assert data["param_1"] == "ABC" data = retriever.retrieve_yaml("NOTEXIST", filename, logstr="test file", fallback=True) assert data["param_1"] == "ABC" data = retriever.retrieve_yaml("NOTEXIST", filename, fallback=False) assert data["param_1"] == "ABC" filename = "test.json" url = join(retrieverfolder, filename) data = retriever.retrieve_json(url, filename, logstr="test file", fallback=False) assert data["my_param"] == "abc" data = retriever.retrieve_json("NOTEXIST", filename, logstr="test file", fallback=True) assert data["my_param"] == "abc" data = retriever.retrieve_json("NOTEXIST", filename, fallback=False) assert data["my_param"] == "abc" retriever = Retrieve( downloader, fallback_dir, saved_dir, temp_dir, save=False, use_saved=True, )
def test_run(self, configuration, fixtures_dir, input_dir): with temp_dir('TestIATIViz', delete_on_success=True, delete_on_failure=False) as tempdir: with Download(user_agent='test') as downloader: retriever = Retrieve(downloader, tempdir, input_dir, tempdir, save=False, use_saved=True) wfp = WFPFood(configuration, None, retriever) countries = wfp.get_countries() assert len(countries) == 291 assert countries[100:102] == [{ 'iso3': 'GTM', 'name': 'Guatemala' }, { 'iso3': 'GUF', 'name': 'French Guiana' }] wfp.build_mappings() dataset, showcase, qc_indicators = wfp.generate_dataset_and_showcase( 'COG', tempdir) assert dataset == { 'name': 'wfp-food-prices-for-congo', 'title': 'Congo - Food Prices', 'maintainer': 'f1921552-8c3e-47e9-9804-579b14a83ee3', 'owner_org': '3ecac442-7fed-448d-8f78-b385ef6f84e7', 'data_update_frequency': '7', 'groups': [{ 'name': 'cog' }], 'subnational': '1', 'tags': [{ 'name': 'commodities', 'vocabulary_id': 'b891512e-9516-4bf5-962a-7a289772a2a1' }, { 'name': 'prices', 'vocabulary_id': 'b891512e-9516-4bf5-962a-7a289772a2a1' }, { 'name': 'markets', 'vocabulary_id': 'b891512e-9516-4bf5-962a-7a289772a2a1' }, { 'name': 'hxl', 'vocabulary_id': 'b891512e-9516-4bf5-962a-7a289772a2a1' }], 'dataset_source': 'CARITAS, GOV, Gvt, National Institute Of Statistics (INS), WFP', 'dataset_date': '[2011-01-15T00:00:00 TO 2020-07-15T00:00:00]' } assert showcase == { 'name': 'wfp-food-prices-for-congo-showcase', 'title': 'Congo - Food Prices showcase', 'notes': 'Congo food prices data from World Food Programme displayed through VAM Economic Explorer', 'url': 'http://dataviz.vam.wfp.org/economic_explorer/prices?iso3=COG', 'image_url': 'http://dataviz.vam.wfp.org/_images/home/3_economic.jpg', 'tags': [{ 'name': 'commodities', 'vocabulary_id': 'b891512e-9516-4bf5-962a-7a289772a2a1' }, { 'name': 'prices', 'vocabulary_id': 'b891512e-9516-4bf5-962a-7a289772a2a1' }, { 'name': 'markets', 'vocabulary_id': 'b891512e-9516-4bf5-962a-7a289772a2a1' }, { 'name': 'hxl', 'vocabulary_id': 'b891512e-9516-4bf5-962a-7a289772a2a1' }] } assert qc_indicators == [{ 'code': 'Brazzaville-Brazzaville-Total-Groundnuts (shelled)-KG-XAF', 'title': 'Price of Groundnuts (shelled) in Total', 'unit': 'Currency XAF', 'description': 'Price of Groundnuts (shelled) (XAF/KG) in Brazzaville/Total', 'code_col': '#meta+code', 'value_col': '#value', 'date_col': '#date' }, { 'code': 'Point-Noire-Pointe-Noire-Grand marché/Fond Ntié-Ntié/Nkouikou-Oil (vegetable)-L-XAF', 'title': 'Price of Oil (vegetable) in Grand marché/Fond Ntié-Ntié/Nkouikou', 'unit': 'Currency XAF', 'description': 'Price of Oil (vegetable) (XAF/L) in Point-Noire/Pointe-Noire/Grand marché/Fond Ntié-Ntié/Nkouikou', 'code_col': '#meta+code', 'value_col': '#value', 'date_col': '#date' }, { 'code': 'Pool-Kinkala-Kinkala-Rice (mixed, low quality)-KG-XAF', 'title': 'Price of Rice (mixed, low quality) in Kinkala', 'unit': 'Currency XAF', 'description': 'Price of Rice (mixed, low quality) (XAF/KG) in Pool/Kinkala', 'code_col': '#meta+code', 'value_col': '#value', 'date_col': '#date' }] for filename in ('wfp_food_prices_cog', 'wfp_food_prices_cog_qc'): csv_filename = f'{filename}.csv' expected_file = join(fixtures_dir, csv_filename) actual_file = join(tempdir, csv_filename) assert_files_same(expected_file, actual_file)