def main():
    """Generate dataset and create it in HDX"""

    configuration = Configuration.read()
    resources = configuration["resources"]
    fields = configuration["fields"]
    source_directory = configuration["source_directory"]
    download_url = Path(source_directory).resolve().as_uri()

    with Download() as downloader:
        countries, headers, countriesdata = get_countriesdata(
            download_url, resources, downloader)
        logger.info("Number of countries: %d" % len(countriesdata))
        for info, country in progress_storing_tempdir("UNHCR_population",
                                                      countries, "iso3"):
            folder = info["folder"]

            dataset, showcase = generate_dataset_and_showcase(
                folder, country, countriesdata[country["iso3"]], headers,
                resources, fields)
            if dataset:
                dataset.update_from_yaml()
                dataset["notes"] = dataset["notes"].replace(
                    "\n", "  \n")  # ensure markdown has line breaks
                dataset.generate_resource_view(1)
                dataset.create_in_hdx(
                    remove_additional_resources=True,
                    hxl_update=False,
                    updated_by_script="HDX Scraper: UNHCR population",
                    batch=info["batch"],
                )
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)
def main():
    """Generate dataset and create it in HDX"""

    with Download() as downloader:
        configuration = Configuration.read()
        countries_path = join('config', configuration['countries_filename'])
        indicators_url = configuration['indicators_url']
        mvam_url = configuration['mvam_url']
        showcase_url = configuration['showcase_url']
        countries = get_countries(countries_path, downloader)
        variables = get_mvamvariables(indicators_url, downloader)
        logger.info('Number of datasets to upload: %d' % len(countries))
        for info, country in progress_storing_tempdir('WFPFoodSecurity',
                                                      countries, 'iso3'):
            dataset, showcase, bites_disabled = \
                generate_dataset_and_showcase(mvam_url, showcase_url, downloader, info['folder'],
                                              country, variables)
            if dataset:
                dataset.update_from_yaml()
                dataset.generate_resource_view(bites_disabled=bites_disabled)
                dataset.create_in_hdx(
                    remove_additional_resources=True,
                    hxl_update=False,
                    updated_by_script='HDX Scraper: WFP Food Security',
                    batch=info['batch'])
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)
示例#3
0
def main():
    """Generate dataset and create it in HDX"""

    configuration = Configuration.read()
    indicators = configuration["indicators"]
    json_url = configuration["json_url"]
    with Download() as downloader:
        indicators_metadata = get_indicators_metadata(json_url, downloader,
                                                      indicators)
        countriesdata, countries = get_countriesdata(json_url, downloader,
                                                     indicators)
        logger.info(f"Number of countries to upload: {len(countries)}")

        for info, country in progress_storing_tempdir("WorldPop", countries,
                                                      "iso3"):
            countryiso = country["iso3"]
            datasets, showcases = generate_datasets_and_showcases(
                downloader, countryiso, indicators_metadata,
                countriesdata[countryiso])
            for dataset in datasets:
                dataset.update_from_yaml()
                dataset.create_in_hdx(
                    remove_additional_resources=True,
                    hxl_update=False,
                    updated_by_script="HDX Scraper: WorldPop",
                    batch=info["batch"],
                )
                for showcase in showcases.get(dataset["name"], list()):
                    showcase.create_in_hdx()
                    showcase.add_dataset(dataset)
示例#4
0
def main():
    """Generate dataset and create it in HDX"""

    configuration = Configuration.read()
    base_url = configuration['base_url']
    with Download(extra_params_yaml=join(expanduser('~'), '.extraparams.yml'),
                  extra_params_lookup=lookup) as downloader:
        downloader.session.mount(
            'http://',
            HTTPAdapter(max_retries=1, pool_connections=100, pool_maxsize=100))
        downloader.session.mount(
            'https://',
            HTTPAdapter(max_retries=1, pool_connections=100, pool_maxsize=100))
        countries = get_countries(base_url, downloader)
        logger.info('Number of countries: %d' % len(countries))

        for folder, country in progress_storing_tempdir(
                'DHS', countries, 'iso3'):
            tags = get_tags(base_url, downloader, country['dhscode'])
            dataset, subdataset, showcase, bites_disabled = \
                generate_datasets_and_showcase(configuration, base_url, downloader, folder, country, tags)
            if dataset:
                createdataset(dataset)
                resource_view = generate_resource_view(
                    dataset, bites_disabled=bites_disabled['national'])
                resource_view.create_in_hdx()
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)
            if subdataset:
                createdataset(subdataset)
                showcase.add_dataset(subdataset)
                subdataset.generate_resource_view(
                    bites_disabled=bites_disabled['subnational'])
示例#5
0
def main(save, use_saved, **ignore):
    """Generate dataset and create it in HDX"""

    with Download(extra_params_yaml=join(expanduser('~'), '.extraparams.yml'),
                  extra_params_lookup=lookup) as token_downloader:
        configuration = Configuration.read()
        with Download() as downloader:
            folder = temp_dir(lookup)
            retriever = Retrieve(downloader, folder, 'saved_data', folder,
                                 save, use_saved)
            wfp = WFPFood(configuration, token_downloader, retriever)
            countries = wfp.get_countries()
            logger.info('Number of country datasets to upload: %d' %
                        len(countries))
            wfp.build_mappings()
            for info, country in progress_storing_tempdir(
                    lookup, countries, 'iso3'):

                dataset, showcase, qc_indicators = wfp.generate_dataset_and_showcase(
                    country['iso3'], info['folder'])
                if dataset:
                    dataset.update_from_yaml()
                    dataset['notes'] = dataset[
                        'notes'] % 'Food Prices data for %s. Food prices data comes from the World Food Programme and covers' % country[
                            'name']
                    dataset.generate_resource_view(-1,
                                                   indicators=qc_indicators)
                    dataset.create_in_hdx(
                        remove_additional_resources=True,
                        hxl_update=False,
                        updated_by_script='HDX Scraper: WFP Food Prices',
                        batch=info['batch'])
                    showcase.create_in_hdx()
                    showcase.add_dataset(dataset)
示例#6
0
def main():
    """Generate dataset and create it in HDX"""

    with Download() as downloader:
        config = Configuration.read()
        project_config = {
            key: value
            for key, value in config.items() if key.startswith("CV")
        }
        qc_indicators = config.get("qc_indicators", {})
        countries, countriesdata, headers = get_all_countriesdata(
            project_config, downloader)

        logger.info("Number of datasets to upload: %d" % len(countries))
        for info, country in progress_storing_tempdir("UNICEFSAM", countries,
                                                      "iso3"):
            dataset, showcase, bites_disabled = generate_dataset_and_showcase(
                info["folder"], country, countriesdata[country["iso3"]],
                headers, project_config, qc_indicators)
            if dataset:
                dataset.update_from_yaml()
                dataset.generate_resource_view(1,
                                               bites_disabled=bites_disabled,
                                               indicators=qc_indicators)
                dataset.create_in_hdx(
                    remove_additional_resources=True,
                    hxl_update=False,
                    updated_by_script="HDX Scraper: UNICEF Sam",
                    batch=info["batch"],
                )
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)
示例#7
0
def main():
    """Generate dataset and create it in HDX"""

    configuration = Configuration.read()
    download_url = configuration["download_url"]
    with Download() as downloader:
        countries, headers, countriesdata = get_countriesdata(download_url, downloader)
        logger.info(f"Number of countries: {len(countriesdata)}")
        for info, country in progress_storing_tempdir("UCDP", countries, "iso3"):
            folder = info["folder"]
            dataset, showcase = generate_dataset_and_showcase(
                folder, country, countriesdata[country["iso3"]], headers
            )
            if dataset:
                dataset.update_from_yaml()
                dataset["notes"] = dataset["notes"].replace(
                    "\n", "  \n"
                )  # ensure markdown has line breaks
                dataset.generate_resource_view(1)
                dataset.create_in_hdx(
                    remove_additional_resources=True,
                    hxl_update=False,
                    updated_by_script="HDX Scraper: UCDP",
                    batch=info["batch"],
                )
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)
示例#8
0
def main():
    """Generate dataset and create it in HDX"""

    configuration = Configuration.read()
    hdro_url = configuration['hdro_url']
    qc_indicators = configuration['qc_indicators']
    with Download() as downloader:
        countriesdata = get_countriesdata(hdro_url, downloader)
        countries = [{
            'iso3': countryiso
        } for countryiso in sorted(countriesdata.keys())]
        logger.info('Number of countries to upload: %d' % len(countries))
        for info, country in progress_storing_tempdir('HDRO', countries,
                                                      'iso3'):
            countryiso = country['iso3']
            countrydata = countriesdata[countryiso]
            dataset, showcase, bites_disabled = generate_dataset_and_showcase(
                info['folder'], countryiso, countrydata, qc_indicators)
            if dataset:
                dataset.update_from_yaml()
                dataset.generate_resource_view(-1,
                                               bites_disabled=bites_disabled,
                                               indicators=qc_indicators)
                dataset.create_in_hdx(remove_additional_resources=True,
                                      hxl_update=False,
                                      updated_by_script='HDX Scraper: HDRO',
                                      batch=info['batch'])
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)
示例#9
0
def main():
    """Generate dataset and create it in HDX"""

    configuration = Configuration.read()
    download_url = configuration['download_url']
    with Download() as downloader:
        countries, headers, countriesdata = get_countriesdata(download_url, downloader)
        logger.info('Number of countries: %d' % len(countriesdata))
        for folder, country in progress_storing_tempdir('UCDP', countries, 'iso3'):
            dataset, showcase = generate_dataset_and_showcase(folder, country, countriesdata[country['iso3']], headers)
            if dataset:
                dataset.update_from_yaml()
                dataset['notes'] = dataset['notes'].replace('\n', '  \n')  # ensure markdown has line breaks
                dataset.generate_resource_view(1)
                dataset.create_in_hdx(remove_additional_resources=True, hxl_update=False, updated_by_script='HDX Scraper: UCDP')
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)
示例#10
0
def main():
    """Generate dataset and create it in HDX"""

    configuration = Configuration.read()
    base_url = configuration['base_url']
    countries_url = configuration['countries_url']
    with Download() as downloader:
        countries = get_countries(countries_url, downloader)
        logger.info('Number of datasets to upload: %d' % len(countries))
        for info, country in progress_storing_tempdir('ACLED', sorted(countries, key=lambda x: x['iso3']), 'iso3'):
            folder = info['folder']
            dataset, showcase = generate_dataset_and_showcase(base_url, downloader, folder, country)
            if dataset:
                dataset.update_from_yaml()
                dataset['license_other'] = dataset['license_other'].replace('\n', '  \n')  # ensure markdown has line breaks
                dataset.generate_resource_view(1)
                dataset.create_in_hdx(remove_additional_resources=True, hxl_update=False, updated_by_script='HDX Scraper: ACLED', batch=info['batch'])
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)
示例#11
0
def main(output_failures=False, **ignore):
    configuration = Configuration.read()
    with Download() as downloader:
        dataset_ids = get_dataset_ids(configuration, downloader)
        logger.info(f"Number of datasets to upload: {len(dataset_ids)}")
        for info, dataset_id in progress_storing_tempdir(
            "UNHCR-MICRODATA", dataset_ids, "id"
        ):
            dataset = generate_dataset(
                dataset_id["id"], configuration, downloader, output_failures
            )
            if dataset:
                dataset.update_from_yaml()
                dataset.create_in_hdx(
                    remove_additional_resources=True,
                    hxl_update=False,
                    updated_by_script="HDX Scraper: UNHCR microdata",
                    batch=info["batch"],
                )
        for failure in failures:
            logger.error(failure)
示例#12
0
    def test_progress_storing_tempdir(self, monkeypatch):
        tempfolder = "papa"
        expected_dir = join(gettempdir(), tempfolder)
        rmtree(expected_dir, ignore_errors=True)
        iterator = [
            {
                "iso3": "AFG",
                "name": "Afghanistan"
            },
            {
                "iso3": "SDN",
                "name": "Sudan"
            },
            {
                "iso3": "YEM",
                "name": "Yemen"
            },
            {
                "iso3": "ZAM",
                "name": "Zambia"
            },
        ]
        expected_batch_file = join(expected_dir, "batch.txt")
        result = list()
        for info, nextdict in progress_storing_tempdir(tempfolder, iterator,
                                                       "iso3"):
            assert info["folder"] == expected_dir
            expected_batch = load_file_to_str(expected_batch_file, strip=True)
            result.append(nextdict)
        assert result == iterator
        assert expected_batch == info["batch"]
        assert exists(expected_dir) is False

        monkeypatch.setenv("WHERETOSTART", "iso3=SDN")
        result = list()
        for info, nextdict in progress_storing_tempdir(tempfolder, iterator,
                                                       "iso3"):
            assert exists(info["folder"]) is True
            assert info["folder"] == expected_dir
            expected_batch = load_file_to_str(expected_batch_file, strip=True)
            result.append(nextdict)
        assert result == iterator[1:]
        assert expected_batch == info["batch"]
        assert exists(expected_dir) is False
        monkeypatch.delenv("WHERETOSTART")

        try:
            for info, nextdict in progress_storing_tempdir(
                    tempfolder, iterator, "iso3"):
                if nextdict["iso3"] == "YEM":
                    start_batch = info["batch"]
                    raise ValueError("Problem!")
        except ValueError:
            pass
        assert exists(expected_dir) is True
        result = list()
        for info, nextdict in progress_storing_tempdir(tempfolder, iterator,
                                                       "iso3"):
            assert exists(info["folder"]) is True
            assert info["folder"] == expected_dir
            assert info["batch"] == start_batch
            result.append(nextdict)
        assert result == iterator[2:]
        assert exists(expected_dir) is False

        try:
            for info, nextdict in progress_storing_tempdir(
                    tempfolder, iterator, "iso3"):
                if nextdict["iso3"] == "YEM":
                    start_batch = info["batch"]
                    raise ValueError("Problem!")
        except ValueError:
            pass
        assert exists(expected_dir) is True
        monkeypatch.setenv("WHERETOSTART", "RESET")
        result = list()
        for info, nextdict in progress_storing_tempdir(tempfolder, iterator,
                                                       "iso3"):
            assert exists(info["folder"]) is True
            assert info["folder"] == expected_dir
            assert info["batch"] != start_batch
            result.append(nextdict)
        assert result == iterator
        assert exists(expected_dir) is False
        monkeypatch.delenv("WHERETOSTART")

        try:
            for info, nextdict in progress_storing_tempdir(
                    tempfolder, iterator, "iso3"):
                if nextdict["iso3"] == "YEM":
                    start_batch = info["batch"]
                    raise ValueError("Problem!")
        except ValueError:
            pass
        assert exists(expected_dir) is True
        monkeypatch.setenv("WHERETOSTART", "iso3=SDN")
        result = list()
        for info, nextdict in progress_storing_tempdir(tempfolder, iterator,
                                                       "iso3"):
            assert exists(info["folder"]) is True
            assert info["folder"] == expected_dir
            assert info["batch"] == start_batch
            result.append(nextdict)
        assert result == iterator[1:]
        assert exists(expected_dir) is False
        monkeypatch.delenv("WHERETOSTART")

        try:
            for info, nextdict in progress_storing_tempdir(
                    tempfolder, iterator, "iso3"):
                if nextdict["iso3"] == "YEM":
                    start_batch = info["batch"]
                    raise ValueError("Problem!")
        except ValueError:
            pass
        monkeypatch.setenv("WHERETOSTART", "iso3=NOTFOUND")
        found = False
        for _ in progress_storing_tempdir(tempfolder, iterator, "iso3"):
            found = True
        assert found is False
        assert exists(expected_dir) is True
        batch = load_file_to_str(expected_batch_file, strip=True)
        assert batch == start_batch
        monkeypatch.delenv("WHERETOSTART")

        monkeypatch.setenv("WHERETOSTART", "NOTFOUND=SDN")
        found = False
        for _ in progress_storing_tempdir(tempfolder, iterator, "iso3"):
            found = True
        assert found is False
        assert exists(expected_dir) is True
        batch = load_file_to_str(expected_batch_file, strip=True)
        assert batch == start_batch
        monkeypatch.delenv("WHERETOSTART")

        rmtree(expected_dir, ignore_errors=True)