def test_get_path_for_url(self, fixtureurl, configfolder, downloaderfolder): path = Download.get_path_for_url(fixtureurl, configfolder) assert abspath(path) == abspath(join(configfolder, 'test_data.csv')) path = Download.get_path_for_url(fixtureurl, downloaderfolder) assert abspath(path) == abspath( join(downloaderfolder, 'test_data3.csv'))
def test_get_url_for_get(self): assert (Download.get_url_for_get( "http://www.lala.com/hdfa?a=3&b=4", OrderedDict([("c", "e"), ("d", "f")]), ) == "http://www.lala.com/hdfa?a=3&b=4&c=e&d=f") assert (Download.get_url_for_get("http://www.lala.com/hdfa?a=3&b=4", { "c": "e", "d": "f" }) == "http://www.lala.com/hdfa?a=3&b=4&c=e&d=f")
def test_hxl_row(self): headers = ["a", "b", "c"] hxltags = {"b": "#b", "c": "#c"} assert Download.hxl_row(headers, hxltags) == ["", "#b", "#c"] assert Download.hxl_row(headers, hxltags, dict_form=True) == { "a": "", "b": "#b", "c": "#c", } assert Download.hxl_row(headers, dict()) == ["", "", ""] assert Download.hxl_row([], hxltags) == list()
def test_init(self, downloaderfolder): basicauthfile = join(downloaderfolder, 'basicauth.txt') with Download(basicauthfile=basicauthfile) as download: assert download.session.auth == ('testuser', 'testpass') with pytest.raises(DownloadError): Download(auth=('u', 'p'), basicauth='Basic xxxxxxxxxxxxxxxx') with pytest.raises(DownloadError): Download(auth=('u', 'p'), basicauthfile=join('lala', 'lala.txt')) with pytest.raises(DownloadError): Download(basicauth='Basic xxxxxxxxxxxxxxxx', basicauthfile=join('lala', 'lala.txt')) with pytest.raises(IOError): Download(basicauthfile='NOTEXIST')
def test_download_tabular_rows_as_dicts(self, fixtureprocessurl): with Download() as downloader: result = downloader.download_tabular_rows_as_dicts( fixtureprocessurl, headers=2) self.fix_strings(result) assert result == { "coal": { "header2": "3", "header3": "7.4", "header4": "needed", }, "gas": { "header2": "2", "header3": "6.5", "header4": "n/a" }, } result = downloader.download_tabular_rows_as_dicts( fixtureprocessurl, headers=2, keycolumn=2) self.fix_strings(result) assert result == { "2": { "header1": "gas", "header3": "6.5", "header4": "n/a" }, "3": { "header1": "coal", "header3": "7.4", "header4": "needed", }, }
def main( output_dir, saved_dir, save, use_saved, dportal_params, whattorun, filterdate, **ignore, ): logger.info(f"##### hdx-scraper-iati-viz version {VERSION:.1f} ####") configuration = Configuration.read() output_dir = f"{output_dir}_{whattorun}" rmtree(output_dir, ignore_errors=True) mkdir(output_dir) with Download() as downloader: retriever = Retrieve( downloader, configuration["fallback_dir"], f"{saved_dir}_{whattorun}", output_dir, save, use_saved, ) today = datetime.utcnow().isoformat() start( configuration, today, retriever, output_dir, dportal_params, whattorun, filterdate, )
def main(): """Generate dataset and create it in HDX""" with Download() as downloader: config = Configuration.read() project_config = { key: value for key, value in config.items() if key.startswith("CV") } qc_indicators = config.get("qc_indicators", {}) countries, countriesdata, headers = get_all_countriesdata( project_config, downloader) logger.info("Number of datasets to upload: %d" % len(countries)) for info, country in progress_storing_tempdir("UNICEFSAM", countries, "iso3"): dataset, showcase, bites_disabled = generate_dataset_and_showcase( info["folder"], country, countriesdata[country["iso3"]], headers, project_config, qc_indicators) if dataset: dataset.update_from_yaml() dataset.generate_resource_view(1, bites_disabled=bites_disabled, indicators=qc_indicators) dataset.create_in_hdx( remove_additional_resources=True, hxl_update=False, updated_by_script="HDX Scraper: UNICEF Sam", batch=info["batch"], ) showcase.create_in_hdx() showcase.add_dataset(dataset)
def main(): """Generate dataset and create it in HDX""" configuration = Configuration.read() base_url = configuration['base_url'] with Download(extra_params_yaml=join(expanduser('~'), '.extraparams.yml'), extra_params_lookup=lookup) as downloader: downloader.session.mount( 'http://', HTTPAdapter(max_retries=1, pool_connections=100, pool_maxsize=100)) downloader.session.mount( 'https://', HTTPAdapter(max_retries=1, pool_connections=100, pool_maxsize=100)) countries = get_countries(base_url, downloader) logger.info('Number of countries: %d' % len(countries)) for folder, country in progress_storing_tempdir( 'DHS', countries, 'iso3'): tags = get_tags(base_url, downloader, country['dhscode']) dataset, subdataset, showcase, bites_disabled = \ generate_datasets_and_showcase(configuration, base_url, downloader, folder, country, tags) if dataset: createdataset(dataset) resource_view = generate_resource_view( dataset, bites_disabled=bites_disabled['national']) resource_view.create_in_hdx() showcase.create_in_hdx() showcase.add_dataset(dataset) if subdataset: createdataset(subdataset) showcase.add_dataset(subdataset) subdataset.generate_resource_view( bites_disabled=bites_disabled['subnational'])
def main(): """Generate dataset and create it in HDX""" filelist_url = Configuration.read()['filelist_url'] country_group_url = Configuration.read()['country_group_url'] dataset_codes = Configuration.read()['dataset_codes'] showcase_base_url = Configuration.read()['showcase_base_url'] with temp_dir('faostat') as folder: with Download() as downloader: indicatortypes = get_indicatortypesdata(filelist_url, downloader) countriesdata = get_countriesdata(country_group_url, downloader) logger.info('Number of indicator types to upload: %d' % len(dataset_codes)) for dataset_code in dataset_codes: datasets, showcases = generate_datasets_and_showcases( downloader, folder, dataset_codes[dataset_code], indicatortypes[dataset_code], countriesdata, showcase_base_url) logger.info('Number of datasets to upload: %d' % len(datasets)) for i, dataset in enumerate(datasets): logger.info('Creating dataset: %s' % dataset['title']) dataset.preview_off() dataset.create_in_hdx() showcase = showcases[i] showcase.create_in_hdx() showcase.add_dataset(dataset)
def main(): """Generate dataset and create it in HDX""" configuration = Configuration.read() download_url = configuration["download_url"] with Download() as downloader: countries, headers, countriesdata = get_countriesdata(download_url, downloader) logger.info(f"Number of countries: {len(countriesdata)}") for info, country in progress_storing_tempdir("UCDP", countries, "iso3"): folder = info["folder"] dataset, showcase = generate_dataset_and_showcase( folder, country, countriesdata[country["iso3"]], headers ) if dataset: dataset.update_from_yaml() dataset["notes"] = dataset["notes"].replace( "\n", " \n" ) # ensure markdown has line breaks dataset.generate_resource_view(1) dataset.create_in_hdx( remove_additional_resources=True, hxl_update=False, updated_by_script="HDX Scraper: UCDP", batch=info["batch"], ) showcase.create_in_hdx() showcase.add_dataset(dataset)
def data(self, configuration): resources = configuration["resources"] download_url = (Path(__file__).resolve().parent / "fixtures").as_uri() print(download_url) return get_countriesdata(download_url, resources, Download(user_agent="test"))
def main(): """Generate dataset and create it in HDX""" configuration = Configuration.read() hdro_url = configuration['hdro_url'] qc_indicators = configuration['qc_indicators'] with Download() as downloader: countriesdata = get_countriesdata(hdro_url, downloader) countries = [{ 'iso3': countryiso } for countryiso in sorted(countriesdata.keys())] logger.info('Number of countries to upload: %d' % len(countries)) for info, country in progress_storing_tempdir('HDRO', countries, 'iso3'): countryiso = country['iso3'] countrydata = countriesdata[countryiso] dataset, showcase, bites_disabled = generate_dataset_and_showcase( info['folder'], countryiso, countrydata, qc_indicators) if dataset: dataset.update_from_yaml() dataset.generate_resource_view(-1, bites_disabled=bites_disabled, indicators=qc_indicators) dataset.create_in_hdx(remove_additional_resources=True, hxl_update=False, updated_by_script='HDX Scraper: HDRO', batch=info['batch']) showcase.create_in_hdx() showcase.add_dataset(dataset)
def test_get_tabular_rows_as_list(self, fixtureprocessurl): with Download() as downloader: rows = list(downloader.get_tabular_rows_as_list(fixtureprocessurl)) assert rows == [ ["la1", "ha1", "ba1", "ma1"], ["header1", "header2", "header3", "header4"], ["coal", "3", "7.4", "needed"], ["gas", "2", "6.5", "n/a"], ]
def test_get_url_params_for_post(self): result = Download.get_url_params_for_post( "http://www.lala.com/hdfa?a=3&b=4", OrderedDict([("c", "e"), ("d", "f")]), ) assert result[0] == "http://www.lala.com/hdfa" assert list(result[1].items()) == list( OrderedDict([("a", "3"), ("b", "4"), ("c", "e"), ("d", "f")]).items()) result = Download.get_url_params_for_post( "http://www.lala.com/hdfa?a=3&b=4", { "c": "e", "d": "f" }) assert result[0] == "http://www.lala.com/hdfa" assert list(result[1].items()) == list( OrderedDict([("a", "3"), ("b", "4"), ("c", "e"), ("d", "f")]).items())
def test_get_indicators(self, configuration, folder): with temp_dir('TestCovidViz') as tempdir: with Download(user_agent='test') as downloader: tabs = configuration['tabs'] noout = nooutput(tabs) jsonout = jsonoutput(configuration, tabs) outputs = {'gsheets': noout, 'excel': noout, 'json': jsonout} get_indicators(configuration, downloader, outputs, tabs, scrapers=['ifi', 'who', 'covid_trend']) filepath = jsonout.save(tempdir) assert_files_same(filepath, join(folder, 'test_tabular.json'))
def test_download_tabular_key_value(self, fixtureurl, fixtureprocessurl): with Download() as downloader: result = downloader.download_tabular_key_value(fixtureurl, file_type="csv") assert result == {"615": "2231RTA", "GWNO": "EVENT_ID_CNTY"} result = downloader.download_tabular_key_value(fixtureprocessurl, headers=2) assert result == {"coal": "3", "gas": "2"} with pytest.raises(DownloadError): downloader.download_tabular_key_value( "NOTEXIST://NOTEXIST.csv")