示例#1
0
 def test_get_path_for_url(self, fixtureurl, configfolder,
                           downloaderfolder):
     path = Download.get_path_for_url(fixtureurl, configfolder)
     assert abspath(path) == abspath(join(configfolder, 'test_data.csv'))
     path = Download.get_path_for_url(fixtureurl, downloaderfolder)
     assert abspath(path) == abspath(
         join(downloaderfolder, 'test_data3.csv'))
 def test_get_url_for_get(self):
     assert (Download.get_url_for_get(
         "http://www.lala.com/hdfa?a=3&b=4",
         OrderedDict([("c", "e"), ("d", "f")]),
     ) == "http://www.lala.com/hdfa?a=3&b=4&c=e&d=f")
     assert (Download.get_url_for_get("http://www.lala.com/hdfa?a=3&b=4", {
         "c": "e",
         "d": "f"
     }) == "http://www.lala.com/hdfa?a=3&b=4&c=e&d=f")
 def test_hxl_row(self):
     headers = ["a", "b", "c"]
     hxltags = {"b": "#b", "c": "#c"}
     assert Download.hxl_row(headers, hxltags) == ["", "#b", "#c"]
     assert Download.hxl_row(headers, hxltags, dict_form=True) == {
         "a": "",
         "b": "#b",
         "c": "#c",
     }
     assert Download.hxl_row(headers, dict()) == ["", "", ""]
     assert Download.hxl_row([], hxltags) == list()
示例#4
0
 def test_init(self, downloaderfolder):
     basicauthfile = join(downloaderfolder, 'basicauth.txt')
     with Download(basicauthfile=basicauthfile) as download:
         assert download.session.auth == ('testuser', 'testpass')
     with pytest.raises(DownloadError):
         Download(auth=('u', 'p'), basicauth='Basic xxxxxxxxxxxxxxxx')
     with pytest.raises(DownloadError):
         Download(auth=('u', 'p'), basicauthfile=join('lala', 'lala.txt'))
     with pytest.raises(DownloadError):
         Download(basicauth='Basic xxxxxxxxxxxxxxxx',
                  basicauthfile=join('lala', 'lala.txt'))
     with pytest.raises(IOError):
         Download(basicauthfile='NOTEXIST')
 def test_download_tabular_rows_as_dicts(self, fixtureprocessurl):
     with Download() as downloader:
         result = downloader.download_tabular_rows_as_dicts(
             fixtureprocessurl, headers=2)
         self.fix_strings(result)
         assert result == {
             "coal": {
                 "header2": "3",
                 "header3": "7.4",
                 "header4": "needed",
             },
             "gas": {
                 "header2": "2",
                 "header3": "6.5",
                 "header4": "n/a"
             },
         }
         result = downloader.download_tabular_rows_as_dicts(
             fixtureprocessurl, headers=2, keycolumn=2)
         self.fix_strings(result)
         assert result == {
             "2": {
                 "header1": "gas",
                 "header3": "6.5",
                 "header4": "n/a"
             },
             "3": {
                 "header1": "coal",
                 "header3": "7.4",
                 "header4": "needed",
             },
         }
示例#6
0
def main(
    output_dir,
    saved_dir,
    save,
    use_saved,
    dportal_params,
    whattorun,
    filterdate,
    **ignore,
):
    logger.info(f"##### hdx-scraper-iati-viz version {VERSION:.1f} ####")
    configuration = Configuration.read()
    output_dir = f"{output_dir}_{whattorun}"
    rmtree(output_dir, ignore_errors=True)
    mkdir(output_dir)
    with Download() as downloader:
        retriever = Retrieve(
            downloader,
            configuration["fallback_dir"],
            f"{saved_dir}_{whattorun}",
            output_dir,
            save,
            use_saved,
        )
        today = datetime.utcnow().isoformat()
        start(
            configuration,
            today,
            retriever,
            output_dir,
            dportal_params,
            whattorun,
            filterdate,
        )
示例#7
0
def main():
    """Generate dataset and create it in HDX"""

    with Download() as downloader:
        config = Configuration.read()
        project_config = {
            key: value
            for key, value in config.items() if key.startswith("CV")
        }
        qc_indicators = config.get("qc_indicators", {})
        countries, countriesdata, headers = get_all_countriesdata(
            project_config, downloader)

        logger.info("Number of datasets to upload: %d" % len(countries))
        for info, country in progress_storing_tempdir("UNICEFSAM", countries,
                                                      "iso3"):
            dataset, showcase, bites_disabled = generate_dataset_and_showcase(
                info["folder"], country, countriesdata[country["iso3"]],
                headers, project_config, qc_indicators)
            if dataset:
                dataset.update_from_yaml()
                dataset.generate_resource_view(1,
                                               bites_disabled=bites_disabled,
                                               indicators=qc_indicators)
                dataset.create_in_hdx(
                    remove_additional_resources=True,
                    hxl_update=False,
                    updated_by_script="HDX Scraper: UNICEF Sam",
                    batch=info["batch"],
                )
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)
示例#8
0
def main():
    """Generate dataset and create it in HDX"""

    configuration = Configuration.read()
    base_url = configuration['base_url']
    with Download(extra_params_yaml=join(expanduser('~'), '.extraparams.yml'),
                  extra_params_lookup=lookup) as downloader:
        downloader.session.mount(
            'http://',
            HTTPAdapter(max_retries=1, pool_connections=100, pool_maxsize=100))
        downloader.session.mount(
            'https://',
            HTTPAdapter(max_retries=1, pool_connections=100, pool_maxsize=100))
        countries = get_countries(base_url, downloader)
        logger.info('Number of countries: %d' % len(countries))

        for folder, country in progress_storing_tempdir(
                'DHS', countries, 'iso3'):
            tags = get_tags(base_url, downloader, country['dhscode'])
            dataset, subdataset, showcase, bites_disabled = \
                generate_datasets_and_showcase(configuration, base_url, downloader, folder, country, tags)
            if dataset:
                createdataset(dataset)
                resource_view = generate_resource_view(
                    dataset, bites_disabled=bites_disabled['national'])
                resource_view.create_in_hdx()
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)
            if subdataset:
                createdataset(subdataset)
                showcase.add_dataset(subdataset)
                subdataset.generate_resource_view(
                    bites_disabled=bites_disabled['subnational'])
示例#9
0
def main():
    """Generate dataset and create it in HDX"""

    filelist_url = Configuration.read()['filelist_url']
    country_group_url = Configuration.read()['country_group_url']
    dataset_codes = Configuration.read()['dataset_codes']
    showcase_base_url = Configuration.read()['showcase_base_url']
    with temp_dir('faostat') as folder:
        with Download() as downloader:
            indicatortypes = get_indicatortypesdata(filelist_url, downloader)
            countriesdata = get_countriesdata(country_group_url, downloader)
            logger.info('Number of indicator types to upload: %d' %
                        len(dataset_codes))
            for dataset_code in dataset_codes:
                datasets, showcases = generate_datasets_and_showcases(
                    downloader, folder, dataset_codes[dataset_code],
                    indicatortypes[dataset_code], countriesdata,
                    showcase_base_url)
                logger.info('Number of datasets to upload: %d' % len(datasets))
                for i, dataset in enumerate(datasets):
                    logger.info('Creating dataset: %s' % dataset['title'])
                    dataset.preview_off()
                    dataset.create_in_hdx()
                    showcase = showcases[i]
                    showcase.create_in_hdx()
                    showcase.add_dataset(dataset)
示例#10
0
def main():
    """Generate dataset and create it in HDX"""

    configuration = Configuration.read()
    download_url = configuration["download_url"]
    with Download() as downloader:
        countries, headers, countriesdata = get_countriesdata(download_url, downloader)
        logger.info(f"Number of countries: {len(countriesdata)}")
        for info, country in progress_storing_tempdir("UCDP", countries, "iso3"):
            folder = info["folder"]
            dataset, showcase = generate_dataset_and_showcase(
                folder, country, countriesdata[country["iso3"]], headers
            )
            if dataset:
                dataset.update_from_yaml()
                dataset["notes"] = dataset["notes"].replace(
                    "\n", "  \n"
                )  # ensure markdown has line breaks
                dataset.generate_resource_view(1)
                dataset.create_in_hdx(
                    remove_additional_resources=True,
                    hxl_update=False,
                    updated_by_script="HDX Scraper: UCDP",
                    batch=info["batch"],
                )
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)
示例#11
0
    def data(self, configuration):
        resources = configuration["resources"]
        download_url = (Path(__file__).resolve().parent / "fixtures").as_uri()

        print(download_url)
        return get_countriesdata(download_url, resources,
                                 Download(user_agent="test"))
示例#12
0
def main():
    """Generate dataset and create it in HDX"""

    configuration = Configuration.read()
    hdro_url = configuration['hdro_url']
    qc_indicators = configuration['qc_indicators']
    with Download() as downloader:
        countriesdata = get_countriesdata(hdro_url, downloader)
        countries = [{
            'iso3': countryiso
        } for countryiso in sorted(countriesdata.keys())]
        logger.info('Number of countries to upload: %d' % len(countries))
        for info, country in progress_storing_tempdir('HDRO', countries,
                                                      'iso3'):
            countryiso = country['iso3']
            countrydata = countriesdata[countryiso]
            dataset, showcase, bites_disabled = generate_dataset_and_showcase(
                info['folder'], countryiso, countrydata, qc_indicators)
            if dataset:
                dataset.update_from_yaml()
                dataset.generate_resource_view(-1,
                                               bites_disabled=bites_disabled,
                                               indicators=qc_indicators)
                dataset.create_in_hdx(remove_additional_resources=True,
                                      hxl_update=False,
                                      updated_by_script='HDX Scraper: HDRO',
                                      batch=info['batch'])
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)
 def test_get_tabular_rows_as_list(self, fixtureprocessurl):
     with Download() as downloader:
         rows = list(downloader.get_tabular_rows_as_list(fixtureprocessurl))
         assert rows == [
             ["la1", "ha1", "ba1", "ma1"],
             ["header1", "header2", "header3", "header4"],
             ["coal", "3", "7.4", "needed"],
             ["gas", "2", "6.5", "n/a"],
         ]
 def test_get_url_params_for_post(self):
     result = Download.get_url_params_for_post(
         "http://www.lala.com/hdfa?a=3&b=4",
         OrderedDict([("c", "e"), ("d", "f")]),
     )
     assert result[0] == "http://www.lala.com/hdfa"
     assert list(result[1].items()) == list(
         OrderedDict([("a", "3"), ("b", "4"), ("c", "e"),
                      ("d", "f")]).items())
     result = Download.get_url_params_for_post(
         "http://www.lala.com/hdfa?a=3&b=4", {
             "c": "e",
             "d": "f"
         })
     assert result[0] == "http://www.lala.com/hdfa"
     assert list(result[1].items()) == list(
         OrderedDict([("a", "3"), ("b", "4"), ("c", "e"),
                      ("d", "f")]).items())
示例#15
0
 def test_get_indicators(self, configuration, folder):
     with temp_dir('TestCovidViz') as tempdir:
         with Download(user_agent='test') as downloader:
             tabs = configuration['tabs']
             noout = nooutput(tabs)
             jsonout = jsonoutput(configuration, tabs)
             outputs = {'gsheets': noout, 'excel': noout, 'json': jsonout}
             get_indicators(configuration, downloader, outputs, tabs, scrapers=['ifi', 'who', 'covid_trend'])
             filepath = jsonout.save(tempdir)
             assert_files_same(filepath, join(folder, 'test_tabular.json'))
 def test_download_tabular_key_value(self, fixtureurl, fixtureprocessurl):
     with Download() as downloader:
         result = downloader.download_tabular_key_value(fixtureurl,
                                                        file_type="csv")
         assert result == {"615": "2231RTA", "GWNO": "EVENT_ID_CNTY"}
         result = downloader.download_tabular_key_value(fixtureprocessurl,
                                                        headers=2)
         assert result == {"coal": "3", "gas": "2"}
         with pytest.raises(DownloadError):
             downloader.download_tabular_key_value(
                 "NOTEXIST://NOTEXIST.csv")