示例#1
0
def main(save, use_saved, **ignore):
    """Generate dataset and create it in HDX"""

    with Download(extra_params_yaml=join(expanduser('~'), '.extraparams.yml'),
                  extra_params_lookup=lookup) as token_downloader:
        configuration = Configuration.read()
        with Download() as downloader:
            folder = temp_dir(lookup)
            retriever = Retrieve(downloader, folder, 'saved_data', folder,
                                 save, use_saved)
            wfp = WFPFood(configuration, token_downloader, retriever)
            countries = wfp.get_countries()
            logger.info('Number of country datasets to upload: %d' %
                        len(countries))
            wfp.build_mappings()
            for info, country in progress_storing_tempdir(
                    lookup, countries, 'iso3'):

                dataset, showcase, qc_indicators = wfp.generate_dataset_and_showcase(
                    country['iso3'], info['folder'])
                if dataset:
                    dataset.update_from_yaml()
                    dataset['notes'] = dataset[
                        'notes'] % 'Food Prices data for %s. Food prices data comes from the World Food Programme and covers' % country[
                            'name']
                    dataset.generate_resource_view(-1,
                                                   indicators=qc_indicators)
                    dataset.create_in_hdx(
                        remove_additional_resources=True,
                        hxl_update=False,
                        updated_by_script='HDX Scraper: WFP Food Prices',
                        batch=info['batch'])
                    showcase.create_in_hdx()
                    showcase.add_dataset(dataset)
    def test_generate_dataset_and_showcase(self, downloader, configuration):
        countriesdata = get_countriesdata('http://xxx', downloader,
                                          self.country_correspondence)
        countrydata = countriesdata[0]
        with temp_dir('wfp-foodprices') as folder:
            dataset, showcase = generate_dataset_and_showcase(
                'http://yyy?ac=', downloader, folder, countrydata, {})

            assert dataset["name"] == "wfp-food-prices-for-afghanistan"
            assert dataset["title"] == "Afghanistan - Food Prices"

            resources = dataset.get_resources()
            assert resources[0] == {
                'format': 'csv',
                'description': 'Food prices data with HXL tags',
                'name': 'Afghanistan - Food Prices',
                'dataset_preview_enabled': 'False',
                'resource_type': 'file.upload',
                'url_type': 'upload'
            }
            assert resources[1]["name"] == 'Afghanistan - Food Median Prices'

            assert showcase["title"] == "Afghanistan - Food Prices showcase"
            assert showcase[
                "url"] == "http://dataviz.vam.wfp.org/economic_explorer/prices?adm0=1"
示例#3
0
def main(excel_path, gsheet_auth, updatesheets, updatetabs, scrapers, basic_auths, other_auths, nojson,
         countries_override, save, use_saved, **ignore):
    logger.info('##### hdx-scraper-covid-viz version %.1f ####' % VERSION)
    configuration = Configuration.read()
    with temp_dir() as temp_folder:
        with Download(rate_limit={'calls': 1, 'period': 0.1}) as downloader:
            retriever = Retrieve(downloader, temp_folder, 'saved_data', temp_folder, save, use_saved)
            if scrapers:
                logger.info('Updating only scrapers: %s' % scrapers)
            tabs = configuration['tabs']
            if updatetabs is None:
                updatetabs = list(tabs.keys())
                logger.info('Updating all tabs')
            else:
                logger.info('Updating only these tabs: %s' % updatetabs)
            noout = NoOutput(updatetabs)
            if excel_path:
                excelout = ExcelOutput(excel_path, tabs, updatetabs)
            else:
                excelout = noout
            if gsheet_auth:
                gsheets = GoogleSheets(configuration, gsheet_auth, updatesheets, tabs, updatetabs)
            else:
                gsheets = noout
            if nojson:
                jsonout = noout
            else:
                jsonout = JsonOutput(configuration, updatetabs)
            outputs = {'gsheets': gsheets, 'excel': excelout, 'json': jsonout}
            today = datetime.now()
            countries_to_save = get_indicators(configuration, today, retriever, outputs, updatetabs, scrapers,
                                               basic_auths, other_auths, countries_override)
            jsonout.add_additional_json(downloader, today=today)
            jsonout.save(countries_to_save=countries_to_save)
            excelout.save()
示例#4
0
def main():
    """Generate dataset and create it in HDX"""

    filelist_url = Configuration.read()['filelist_url']
    country_group_url = Configuration.read()['country_group_url']
    dataset_codes = Configuration.read()['dataset_codes']
    showcase_base_url = Configuration.read()['showcase_base_url']
    with temp_dir('faostat') as folder:
        with Download() as downloader:
            indicatortypes = get_indicatortypesdata(filelist_url, downloader)
            countriesdata = get_countriesdata(country_group_url, downloader)
            logger.info('Number of indicator types to upload: %d' %
                        len(dataset_codes))
            for dataset_code in dataset_codes:
                datasets, showcases = generate_datasets_and_showcases(
                    downloader, folder, dataset_codes[dataset_code],
                    indicatortypes[dataset_code], countriesdata,
                    showcase_base_url)
                logger.info('Number of datasets to upload: %d' % len(datasets))
                for i, dataset in enumerate(datasets):
                    logger.info('Creating dataset: %s' % dataset['title'])
                    dataset.preview_off()
                    dataset.create_in_hdx()
                    showcase = showcases[i]
                    showcase.create_in_hdx()
                    showcase.add_dataset(dataset)
示例#5
0
    def test_generate_dataset_and_showcase(self, configuration, data):
        with temp_dir("ucdp") as folder:
            resources = configuration["resources"]
            fields = configuration["fields"]
            countries, headers, countriesdata, qc_rows = data
            index = [i for i, c in enumerate(countries)
                     if c["iso3"] == "BGD"][0]
            dataset, showcase, bites_disabled = generate_dataset_and_showcase(
                folder,
                countries[index],
                countriesdata["BGD"],
                qc_rows,
                headers,
                resources,
                fields,
            )
            assert dataset["name"] == "unhcr-population-data-for-bgd"
            assert (
                dataset["title"] ==
                "Bangladesh - Data on forcibly displaced populations and stateless persons"
            )

            resources = dataset.get_resources()
            assert len(resources) == 5  # should be 10 if all data is available

            assert showcase["name"] == "unhcr-population-data-for-bgd-showcase"

            assert bites_disabled == [False, True, True]
示例#6
0
    def test_generate_dataset_and_showcase(self, configuration, downloader, endpoints_metadata):
        with temp_dir('UNESCO') as folder:
            res = generate_dataset_and_showcase(downloader, countrydata, endpoints_metadata, folder=folder)
            dataset, showcase = next(res)
            assert dataset == {'tags': [{'name': 'sustainable development', 'vocabulary_id': '4e61d464-4943-4e97-973a-84673c1aaa87'},
                                        {'name': 'demographics', 'vocabulary_id': '4e61d464-4943-4e97-973a-84673c1aaa87'},
                                        {'name': 'socioeconomics', 'vocabulary_id': '4e61d464-4943-4e97-973a-84673c1aaa87'},
                                        {'name': 'education', 'vocabulary_id': '4e61d464-4943-4e97-973a-84673c1aaa87'}],
                               'owner_org': '18f2d467-dcf8-4b7e-bffa-b3c338ba3a7c', 'data_update_frequency': '365',
                               'title': 'UNESCO Education: Financial resources - Argentina',
                               'groups': [{'name': 'arg'}], 'maintainer': '196196be-6037-4488-8b71-d786adf4c081',
                               'name': 'unesco-education-financial-resources-argentina', 'dataset_date': '01/01/1970-12/31/2014',
                               'subnational': '0'}
            resources = dataset.get_resources()

            assert resources == [{'description': 'Government expenditure per student', 'format': 'csv', 'name': 'XUNIT', 'resource_type': 'file.upload', 'url_type': 'upload'}]

            assert showcase == {'name': 'unesco-education-financial-resources-argentina-showcase',
                                'notes': 'Education, literacy and other indicators for Argentina',
                                'image_url': 'http://www.tellmaps.com/uis/internal/assets/uisheader-en.png',
                                'url': 'http://uis.unesco.org/en/country/AR',
                                'tags': [{'name': 'sustainable development',
                                          'vocabulary_id': '4e61d464-4943-4e97-973a-84673c1aaa87'},
                                         {'name': 'demographics',
                                          'vocabulary_id': '4e61d464-4943-4e97-973a-84673c1aaa87'},
                                         {'name': 'socioeconomics',
                                          'vocabulary_id': '4e61d464-4943-4e97-973a-84673c1aaa87'},
                                         {'name': 'education', 'vocabulary_id': '4e61d464-4943-4e97-973a-84673c1aaa87'}],
                                'title': 'UNESCO Education: Financial resources - Argentina'}
 def test_run(self, configuration, fixtures_dir):
     with temp_dir("TestIATIViz",
                   delete_on_success=True,
                   delete_on_failure=False) as tempdir:
         with Download(user_agent="test") as downloader:
             retriever = Retrieve(
                 downloader,
                 tempdir,
                 fixtures_dir,
                 tempdir,
                 save=False,
                 use_saved=True,
             )
             today = "2021-05-06"
             start(
                 configuration,
                 today,
                 retriever,
                 tempdir,
                 dportal_params=None,
                 whattorun="covid",
                 filterdate="2020-01",
             )
             for filename in ("flows", "transactions", "reporting_orgs"):
                 csv_filename = f"{filename}.csv"
                 expected_file = join(fixtures_dir, csv_filename)
                 actual_file = join(tempdir, csv_filename)
                 assert_files_same(expected_file, actual_file)
                 json_filename = f"{filename}.json"
                 expected_file = join(fixtures_dir, json_filename)
                 actual_file = join(tempdir, json_filename)
                 assert filecmp.cmp(expected_file, actual_file)
示例#8
0
 def test_get_indicatortypes(self, configuration, downloader,
                             mock_urlretrieve):
     with temp_dir("faostat-test") as folder:
         indicatortypesdata = download_indicatorsets(
             configuration["filelist_url"],
             configuration["indicatorsetnames"],
             downloader,
             folder,
         )
         assert indicatortypesdata == TestFaostat.indicatorsets
示例#9
0
 def test_get_indicators(self, configuration, folder):
     with temp_dir('TestCovidViz') as tempdir:
         with Download(user_agent='test') as downloader:
             tabs = configuration['tabs']
             noout = nooutput(tabs)
             jsonout = jsonoutput(configuration, tabs)
             outputs = {'gsheets': noout, 'excel': noout, 'json': jsonout}
             get_indicators(configuration, downloader, outputs, tabs, scrapers=['ifi', 'who', 'covid_trend'])
             filepath = jsonout.save(tempdir)
             assert_files_same(filepath, join(folder, 'test_tabular.json'))
示例#10
0
    def test_generate_dataset_and_showcase(self, configuration, downloader):
        with temp_dir('faostat') as folder:
            datasets, showcases = generate_datasets_and_showcases(
                downloader, folder, 'Food Security',
                TestFaostat.indicatortypedata, TestFaostat.countrydata,
                'http://zzz/')
            assert datasets[0] == {
                'maintainer': '196196be-6037-4488-8b71-d786adf4c081',
                'owner_org': 'ed727a5b-3e6e-4cd6-b97e-4a71532085e6',
                'data_update_frequency': '365',
                'subnational': '0',
                'tags': [{
                    'name': 'hxl'
                }, {
                    'name': 'food security'
                }],
                'name': 'faostat-afghanistan-indicators-for-food-security',
                'title': 'Afghanistan - Food Security Indicators',
                'license_id': 'cc-by-igo',
                'notes':
                'FAO statistics collates and disseminates food and agricultural statistics globally. The division develops methodologies and standards for data collection, and holds regular meetings and workshops to support member countries develop statistical systems. We produce publications, working papers and statistical yearbooks that cover food security, prices, production and trade and agri-environmental statistics.',
                'caveats':
                'Reliability and accuracy depend on the sampling design and size of the basic variables and these might differ significantly between countries just as the use of data sources, definitions and methods. The accuracy of an indicator is very much dependent on the accuracy of the basic variables that make up the indicator.',
                'methodology': 'Registry',
                'dataset_source': 'FAOSTAT',
                'package_creator': 'mcarans',
                'private': False,
                'groups': [{
                    'name': 'afg'
                }],
                'dataset_date': '01/01/1999-12/31/2014'
            }

            resources = datasets[0].get_resources()
            assert resources == [{
                'name': 'Afghanistan - Food Security Indicators',
                'description': '',
                'format': 'csv'
            }]
            assert showcases[0] == {
                'name':
                'faostat-afghanistan-indicators-for-food-security-showcase',
                'title': 'Afghanistan - Food Security Indicators',
                'notes':
                'FAO statistics collates and disseminates food and agricultural statistics globally. The division develops methodologies and standards for data collection, and holds regular meetings and workshops to support member countries develop statistical systems. We produce publications, working papers and statistical yearbooks that cover food security, prices, production and trade and agri-environmental statistics.',
                'url': 'http://zzz/2',
                'image_url':
                'http://www.fao.org/uploads/pics/food-agriculture.png',
                'tags': [{
                    'name': 'hxl'
                }, {
                    'name': 'food security'
                }]
            }
示例#11
0
 def test_load_file_to_str(self):
     with temp_dir(folder="test_text") as tmpdir:
         text_file = join(tmpdir, "text_file.txt")
         save_str_to_file(TestLoader.text, text_file)
         result = load_file_to_str(text_file)
         assert result == TestLoader.text
         result = load_file_to_str(text_file, strip=True)
         assert result == TestLoader.expected_text_strip
         result = load_file_to_str(text_file, replace_newlines=" ")
         assert result == TestLoader.expected_text_newlines_to_spaces
         with pytest.raises(IOError):
             load_file_to_str(join(tmpdir, "NOTEXIST.txt"))
示例#12
0
def read_ole(downloader, datasetinfo, **kwargs):
    url = get_url(datasetinfo['url'], **kwargs)
    with temp_dir('ole') as folder:
        path = downloader.download_file(url, folder, 'olefile')
        ole = olefile.OleFileIO(path)
        data = ole.openstream('Workbook').getvalue()
        outputfile = join(folder, 'excel_file.xls')
        with open(outputfile, 'wb') as f:
            f.write(data)
        datasetinfo['url'] = outputfile
        datasetinfo['format'] = 'xls'
        return read_tabular(downloader, datasetinfo, **kwargs)
示例#13
0
 def test_get_indicators(self, configuration, folder):
     with temp_dir('TestCovidViz', delete_on_success=True, delete_on_failure=False) as tempdir:
         with Download(user_agent='test') as downloader:
             tabs = configuration['tabs']
             noout = nooutput(tabs)
             jsonout = jsonoutput(configuration, tabs)
             outputs = {'gsheets': noout, 'excel': noout, 'json': jsonout}
             admininfo = AdminInfo.setup(downloader)
             get_indicators(configuration, downloader, admininfo, outputs, tabs, scrapers=['ifi', 'who_global', 'who_national', 'who_subnational', 'who_covid', 'sadd', 'covidtests'])
             filepaths = jsonout.save(tempdir, hrp_iso3s=admininfo.hrp_iso3s)
             assert filecmp.cmp(filepaths[0], join(folder, 'test_tabular.json'))
             assert filecmp.cmp(filepaths[1], join(folder, 'test_tabular_covidseries.json'))
示例#14
0
    def test_generate_dataset_and_showcase(self, configuration, data):
        with temp_dir("ucdp") as folder:
            countries, headers, countriesdata = data
            index = [i for i, c in enumerate(countries) if c["iso3"] == "BGD"][0]
            dataset, showcase = generate_dataset_and_showcase(
                folder, countries[index], countriesdata["BGD"], headers
            )
            assert dataset["name"] == "unhcr-population-data-for-bangladesh"
            assert dataset["title"] == "Bangladesh - Data on UNHCR population"

            resources = dataset.get_resources()
            assert len(resources) == 4  # should be 10 if all data is available

            assert showcase["name"] == "unhcr-population-data-for-bangladesh-showcase"
示例#15
0
    def test_generate_dataset_and_showcase(self, configuration, downloader):
        with temp_dir('ACLED') as folder:
            dataset, showcase = generate_dataset_and_showcase(
                'http://lala?', downloader, folder, TestAcled.countrydata)
            assert dataset == TestAcled.dataset

            resources = dataset.get_resources()
            assert resources == TestAcled.resources

            assert showcase == {
                'name':
                'acled-data-for-cameroon-showcase',
                'notes':
                'Conflict Data Dashboard for Cameroon',
                'url':
                'https://www.acleddata.com/dashboard/#120',
                'tags': [{
                    'name':
                    'hxl',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'violence and conflict',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'protests',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'security incidents',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }],
                'title':
                'Dashboard for Cameroon',
                'image_url':
                'https://www.acleddata.com/wp-content/uploads/2018/01/dash.png'
            }

            dataset, showcase = generate_dataset_and_showcase(
                'http://lala?', downloader, folder, {
                    'm49': 4,
                    'iso3': 'AFG',
                    'countryname': 'Afghanistan'
                })
            assert dataset is None
示例#16
0
    def test_generate_dataset_and_showcase(self, configuration, data):
        with temp_dir("ucdp") as folder:
            countries, headers, countriesdata = data
            dataset, showcase = generate_dataset_and_showcase(
                folder, countries[1], countriesdata["BGD"], headers)
            assert dataset == TestUCDP.dataset

            resources = dataset.get_resources()
            assert resources == TestUCDP.resources
            file = "conflict_data_BGD.csv"
            assert_files_same(join("tests", "fixtures", file),
                              join(folder, file))
            file = "qc_conflict_data_BGD.csv"
            assert_files_same(join("tests", "fixtures", file),
                              join(folder, file))

            assert showcase == {
                "name":
                "ucdp-data-for-bangladesh-showcase",
                "title":
                "Bangladesh - Data on Conflict Events",
                "notes":
                "Conflict Data Dashboard for Bangladesh",
                "url":
                "https://ucdp.uu.se/#country/771",
                "image_url":
                "https://pbs.twimg.com/profile_images/832251660718178304/y-LWa5iK_200x200.jpg",
                "tags": [
                    {
                        "name": "hxl",
                        "vocabulary_id":
                        "4e61d464-4943-4e97-973a-84673c1aaa87",
                    },
                    {
                        "name": "violence and conflict",
                        "vocabulary_id":
                        "4e61d464-4943-4e97-973a-84673c1aaa87",
                    },
                    {
                        "name": "protests",
                        "vocabulary_id":
                        "4e61d464-4943-4e97-973a-84673c1aaa87",
                    },
                    {
                        "name": "security incidents",
                        "vocabulary_id":
                        "4e61d464-4943-4e97-973a-84673c1aaa87",
                    },
                ],
            }
示例#17
0
    def test_generate_dataset_and_showcase(self, configuration, data):
        with temp_dir('ucdp') as folder:
            countries, headers, countriesdata = data
            dataset, showcase = generate_dataset_and_showcase(
                folder, countries[1], countriesdata['BGD'], headers)
            assert dataset == TestUCDP.dataset

            resources = dataset.get_resources()
            assert resources == TestUCDP.resources
            file = 'conflict_data_BGD.csv'
            assert_files_same(join('tests', 'fixtures', file),
                              join(folder, file))
            file = 'qc_conflict_data_BGD.csv'
            assert_files_same(join('tests', 'fixtures', file),
                              join(folder, file))

            assert showcase == {
                'name':
                'ucdp-data-for-bangladesh-showcase',
                'title':
                'Bangladesh - Conflict Data',
                'notes':
                'Conflict Data Dashboard for Bangladesh',
                'url':
                'https://ucdp.uu.se/#country/771',
                'image_url':
                'https://pbs.twimg.com/profile_images/832251660718178304/y-LWa5iK_200x200.jpg',
                'tags': [{
                    'name':
                    'hxl',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'violence and conflict',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'protests',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'security incidents',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }]
            }
    def test_generate_dataset_and_showcase(self, configuration, downloader):
        showcase_url = 'https://vam.wfp.org/CountryPage_assessments.aspx?iso3=%s'
        variables = {'rCSI': 'reduced coping strategy'}
        with temp_dir('wfp-foodsecurity') as folder:
            dataset, showcase, bites_disabled = generate_dataset_and_showcase('http://yyy', showcase_url, downloader, folder, TestScraperName.countrydata, variables)
            assert dataset == {'name': 'wfp-food-security-indicators-for-guinea', 'title': 'Guinea - Food Security Indicators',
                               'maintainer': 'eda0ee04-7436-47f0-87ab-d1b9edcd3bb9', 'owner_org': '3ecac442-7fed-448d-8f78-b385ef6f84e7',
                               'data_update_frequency': '30', 'subnational': '0', 'groups': [{'name': 'gin'}],
                               'tags': [{'name': 'hxl', 'vocabulary_id': '4e61d464-4943-4e97-973a-84673c1aaa87'}, {'name': 'food security', 'vocabulary_id': '4e61d464-4943-4e97-973a-84673c1aaa87'}, {'name': 'indicators', 'vocabulary_id': '4e61d464-4943-4e97-973a-84673c1aaa87'}], 'dataset_date': '06/01/2015'}
            resources = dataset.get_resources()
            assert resources == [{'name': 'pblStatsSum', 'description': 'pblStatsSum: Guinea - Food Security Indicators', 'format': 'csv', 'resource_type': 'file.upload', 'url_type': 'upload'},
                                 {'name': 'QuickCharts-pblStatsSum', 'description': 'Cut down data for QuickCharts', 'format': 'csv', 'resource_type': 'file.upload', 'url_type': 'upload'}]

            assert showcase == {'name': 'wfp-food-security-indicators-for-guinea-showcase', 'title': 'Guinea - Food Security Indicators', 'notes': 'Reports on food security for Guinea', 'url': 'https://vam.wfp.org/CountryPage_assessments.aspx?iso3=GIN', 'image_url': 'https://media.licdn.com/media/gcrc/dms/image/C5612AQHtvuWFVnGKAA/article-cover_image-shrink_423_752/0?e=2129500800&v=beta&t=00XnoAp85WXIxpygKvG7eGir_LqfxzXZz5lRGRrLUZw', 'tags': [{'name': 'hxl', 'vocabulary_id': '4e61d464-4943-4e97-973a-84673c1aaa87'}, {'name': 'food security', 'vocabulary_id': '4e61d464-4943-4e97-973a-84673c1aaa87'}, {'name': 'indicators', 'vocabulary_id': '4e61d464-4943-4e97-973a-84673c1aaa87'}]}
            assert bites_disabled == [True, False, True]
示例#19
0
    def test_jsonoutput(self, configuration, fixtures, hxltags):
        with temp_dir('TestScraperJson', delete_on_success=True, delete_on_failure=False) as tempdir:
            with Download(user_agent='test') as downloader:
                tabs = configuration['tabs']
                noout = NoOutput(tabs)
                jsonout = JsonOutput(configuration, tabs)
                rows = [{'Country Code': '#country+code', 'Country Name': '#country+name', 'Population': '#population'}, 
                        {'Country Code': 'AFG', 'Country Name': 'Afghanistan', 'Population': 38041754}]
                noout.add_data_rows_by_key('test', 'AFG', rows, hxltags)
                jsonout.add_data_rows_by_key('test', 'AFG', rows, hxltags)
                assert jsonout.json == {'test_data': {'AFG': [{'#country+code': '#country+code', '#country+name': '#country+name', '#population': '#population'},
                                                              {'#country+code': 'AFG', '#country+name': 'Afghanistan', '#population': 38041754}]}}
                df = pandas.DataFrame.from_records([rows[1]])
                jsonout.json = dict()
                noout.add_dataframe_rows('test', df, rows[0])
                jsonout.add_dataframe_rows('test', df, rows[0])
                assert jsonout.json == {'test_data': [{'#country+code': 'AFG', '#country+name': 'Afghanistan', '#population': 38041754}]}

                noout.add_data_row('test', rows[1])  # doesn't do anything
示例#20
0
    def test_generate_datasets_and_showcase(self, configuration, downloader):
        with temp_dir('DHS') as folder:
            dataset, subdataset, showcase, bites_disabled = \
                generate_datasets_and_showcase(configuration, 'http://haha/', downloader, folder, TestDHS.country, TestDHS.tags)
            assert dataset == TestDHS.dataset
            resources = dataset.get_resources()
            assert resources == TestDHS.resources
            assert subdataset == TestDHS.subdataset
            assert subdataset.get_resources() == TestDHS.subresources

            assert showcase == {'name': 'dhs-data-for-afghanistan-showcase', 'title': 'Final Report', 'notes': 'Afghanistan Demographic and Health Survey 2015',
                                'url': 'https://www.dhsprogram.com/pubs/pdf/FR323/FR323.pdf', 'image_url': 'https://www.dhsprogram.com/publications/images/thumbnails/FR323.jpg',
                                'tags': [{'name': 'hxl', 'vocabulary_id': '4e61d464-4943-4e97-973a-84673c1aaa87'}, {'name': 'health', 'vocabulary_id': '4e61d464-4943-4e97-973a-84673c1aaa87'}, {'name': 'demographics', 'vocabulary_id': '4e61d464-4943-4e97-973a-84673c1aaa87'}]}
            assert bites_disabled == {'national': [False, False, False], 'subnational': [False, False, False]}
            file = 'DHS Quickstats_national_AFG.csv'
            assert_files_same(join('tests', 'fixtures', file), join(folder, file))
            file = 'DHS Mobile_national_AFG.csv'
            assert_files_same(join('tests', 'fixtures', file), join(folder, file))
            file = 'DHS Quickstats_subnational_AFG.csv'
            assert_files_same(join('tests', 'fixtures', file), join(folder, file))
示例#21
0
 def test_get_indicators(self, configuration, folder):
     configuration = Configuration.read()
     countries = configuration['countries']
     palestine_country_code, _ = Country.get_iso3_country_code_fuzzy(
         'Palestine')
     df_indicators, df_timeseries, df_cumulative = get_indicators(
         folder, countries, palestine_country_code, True)
     with temp_dir('PA-COVID-TEST') as outdir:
         ifilename = 'indicators.csv'
         ioutputfile = join(outdir, ifilename)
         df_indicators.to_csv(ioutputfile)
         tfilename = 'timeseries.csv'
         toutputfile = join(outdir, tfilename)
         df_timeseries.to_csv(toutputfile)
         cfilename = 'cumulative.csv'
         coutputfile = join(outdir, cfilename)
         df_cumulative.to_csv(coutputfile)
         assert_files_same(join(folder, ifilename), ioutputfile)
         assert_files_same(join(folder, tfilename), toutputfile)
         assert_files_same(join(folder, cfilename), coutputfile)
示例#22
0
def main():
    """Generate dataset and create it in HDX"""

    base_url = Configuration.read()['base_url']
    with temp_dir('UNESCO') as folder:
        with Download(extra_params_yaml=join(expanduser('~'),
                                             '.extraparams.yml'),
                      extra_params_lookup=lookup) as downloader:
            endpoints = Configuration.read()['endpoints']
            endpoints_metadata = get_endpoints_metadata(
                base_url, downloader, endpoints)
            countriesdata = get_countriesdata(base_url, downloader)

            logger.info('Number of datasets to upload: %d' %
                        len(countriesdata))

            for countrydata in countriesdata:
                for dataset, showcase in generate_dataset_and_showcase(
                        downloader,
                        countrydata,
                        endpoints_metadata,
                        folder=folder,
                        merge_resources=True,
                        single_dataset=False):  # TODO: fix folder
                    if dataset:
                        dataset.update_from_yaml()
                        start = default_timer()
                        dataset.create_in_hdx(remove_additional_resources=True,
                                              hxl_update=False)
                        print("total time = %d" % (default_timer() - start))
                        resources = dataset.get_resources()
                        resource_ids = [
                            x['id'] for x in sorted(resources,
                                                    key=lambda x: x['name'],
                                                    reverse=False)
                        ]
                        dataset.reorder_resources(resource_ids,
                                                  hxl_update=False)
                        showcase.create_in_hdx()
                        showcase.add_dataset(dataset)
示例#23
0
 def test_get_indicators(self, configuration, folder):
     with temp_dir('TestCovidViz',
                   delete_on_success=True,
                   delete_on_failure=False) as tempdir:
         with Download(user_agent='test') as downloader:
             retriever = Retrieve(downloader,
                                  tempdir,
                                  folder,
                                  tempdir,
                                  save=False,
                                  use_saved=True)
             tabs = configuration['tabs']
             noout = NoOutput(tabs)
             jsonout = JsonOutput(configuration, tabs)
             outputs = {'gsheets': noout, 'excel': noout, 'json': jsonout}
             today = parse_date('2021-05-03')
             countries_to_save = get_indicators(
                 configuration,
                 today,
                 retriever,
                 outputs,
                 tabs,
                 scrapers=[
                     'ifi', 'who_global', 'who_national', 'who_subnational',
                     'who_covid', 'sadd', 'covidtests', 'cadre_harmonise',
                     'access', 'food_prices'
                 ],
                 use_live=False)
             filepaths = jsonout.save(tempdir,
                                      countries_to_save=countries_to_save)
             assert filecmp.cmp(filepaths[0],
                                join(folder, 'test_scraper_all.json'))
             assert filecmp.cmp(filepaths[1],
                                join(folder, 'test_scraper.json'))
             assert filecmp.cmp(filepaths[2],
                                join(folder, 'test_scraper_daily.json'))
             assert filecmp.cmp(
                 filepaths[3], join(folder,
                                    'test_scraper_covidseries.json'))
示例#24
0
def read_ole(downloader, datasetinfo, **kwargs):
    # type: (Download, Dict, Any) -> Tuple[List[str],Iterator[Union[List,Dict]]]
    """Read data from OLE Excel source

    Args:
        downloader (Download): Download object for downloading files
        datasetinfo (Dict): Dictionary of information about dataset
        **kwargs: Variables to use when evaluating template arguments

    Returns:
        Tuple[List[str],Iterator[Union[List,Dict]]]: Tuple (headers, iterator where each row is a list or dictionary)
    """
    url = get_url(datasetinfo['url'], **kwargs)
    with temp_dir('ole') as folder:
        path = downloader.download_file(url, folder, 'olefile')
        ole = olefile.OleFileIO(path)
        data = ole.openstream('Workbook').getvalue()
        outputfile = join(folder, 'excel_file.xls')
        with open(outputfile, 'wb') as f:
            f.write(data)
        datasetinfo['url'] = outputfile
        datasetinfo['format'] = 'xls'
        return read_tabular(downloader, datasetinfo, **kwargs)
示例#25
0
def main():
    """Generate dataset and create it in HDX"""

    with temp_dir('wfp-foodprices') as folder:
        with Download() as downloader:
            config = Configuration.read()

            countries_url = config['countries_url']
            wfpfood_url = config['wfpfood_url']
            country_correspondence = config['country_correspondence']
            shortcuts = config['shortcuts']

            countriesdata = get_countriesdata(countries_url, downloader,
                                              country_correspondence)
            logger.info('Number of datasets to upload: %d' %
                        len(countriesdata))

            for countrydata in countriesdata:
                dataset, showcase = generate_dataset_and_showcase(
                    wfpfood_url, downloader, folder, countrydata, shortcuts)
                if dataset:
                    dataset.update_from_yaml()
                    dataset['notes'] = dataset[
                        'notes'] % 'Food Prices data for %s. Food prices data comes from the World Food Programme and covers' % countrydata[
                            'name']
                    dataset.create_in_hdx()
                    showcase.create_in_hdx()
                    showcase.add_dataset(dataset)
                    resource_view = generate_resource_view(dataset)
                    resource_view.create_in_hdx()

            logger.info('Individual country datasets finished.')

            generate_joint_dataset_and_showcase(wfpfood_url, downloader,
                                                folder, countriesdata)

    logger.info('Done')
示例#26
0
    def test_generate_datasets_and_showcase(self, configuration, downloader):
        with temp_dir('idmc') as folder:
            # indicator dataset test
            indicators = Configuration.read()['indicators']
            tags = Configuration.read()['tags']
            datasets, showcase, headersdata, countriesdata = generate_indicator_datasets_and_showcase(
                downloader, folder, indicators, tags)
            assert datasets == {
                'displacement_data': {
                    'name':
                    'idmc-internally-displaced-persons-idps',
                    'title':
                    'Internally displaced persons - IDPs',
                    'maintainer':
                    '196196be-6037-4488-8b71-d786adf4c081',
                    'owner_org':
                    '647d9d8c-4cac-4c33-b639-649aad1c2893',
                    'data_update_frequency':
                    '365',
                    'subnational':
                    '0',
                    'tags': [{
                        'name':
                        'hxl',
                        'vocabulary_id':
                        '4e61d464-4943-4e97-973a-84673c1aaa87'
                    }, {
                        'name':
                        'displacement',
                        'vocabulary_id':
                        '4e61d464-4943-4e97-973a-84673c1aaa87'
                    }, {
                        'name':
                        'internally displaced persons - idp',
                        'vocabulary_id':
                        '4e61d464-4943-4e97-973a-84673c1aaa87'
                    }, {
                        'name':
                        'violence and conflict',
                        'vocabulary_id':
                        '4e61d464-4943-4e97-973a-84673c1aaa87'
                    }],
                    'notes':
                    "Description\n\nContains data from IDMC's [Global Internal Displacement Database](http://www.internal-displacement.org/database/displacement-data).",
                    'methodology_other':
                    'Methodology',
                    'caveats':
                    'Caveats',
                    'groups': [{
                        'name': 'world'
                    }],
                    'dataset_date':
                    '01/01/2008-12/31/2018'
                },
                'disaster_data': {
                    'name':
                    'idmc-internally-displaced-persons-idps-new-displacement-associated-with-disasters',
                    'title':
                    'Internally displaced persons - IDPs (new displacement associated with disasters)',
                    'maintainer':
                    '196196be-6037-4488-8b71-d786adf4c081',
                    'owner_org':
                    '647d9d8c-4cac-4c33-b639-649aad1c2893',
                    'data_update_frequency':
                    '365',
                    'subnational':
                    '0',
                    'tags': [{
                        'name':
                        'hxl',
                        'vocabulary_id':
                        '4e61d464-4943-4e97-973a-84673c1aaa87'
                    }, {
                        'name':
                        'displacement',
                        'vocabulary_id':
                        '4e61d464-4943-4e97-973a-84673c1aaa87'
                    }, {
                        'name':
                        'internally displaced persons - idp',
                        'vocabulary_id':
                        '4e61d464-4943-4e97-973a-84673c1aaa87'
                    }, {
                        'name':
                        'violence and conflict',
                        'vocabulary_id':
                        '4e61d464-4943-4e97-973a-84673c1aaa87'
                    }],
                    'notes':
                    "Description\n\nContains data from IDMC's [Global Internal Displacement Database](http://www.internal-displacement.org/database/displacement-data).",
                    'methodology_other':
                    'Methodology',
                    'caveats':
                    'Caveats',
                    'groups': [{
                        'name': 'world'
                    }],
                    'dataset_date':
                    '01/01/2008-12/31/2018'
                }
            }
            resources = datasets['displacement_data'].get_resources()
            assert resources == [{
                'description': 'Internally displaced persons - IDPs',
                'format': 'csv',
                'name': 'displacement_data',
                'resource_type': 'file.upload',
                'url_type': 'upload'
            }]
            resource_name = '%s.%s' % (resources[0]['name'],
                                       resources[0]['format'])
            expected_file = join('tests', 'fixtures', resource_name)
            actual_file = join(folder, resource_name)
            assert_files_same(expected_file, actual_file)

            resources = datasets['disaster_data'].get_resources()
            resource_name = '%s.%s' % (resources[0]['name'],
                                       resources[0]['format'])
            expected_file = join('tests', 'fixtures', resource_name)
            actual_file = join(folder, resource_name)
            assert_files_same(expected_file, actual_file)

            assert resources == [{
                'description':
                'Internally displaced persons - IDPs (new displacement associated with disasters)',
                'format': 'csv',
                'name': 'disaster_data',
                'resource_type': 'file.upload',
                'url_type': 'upload'
            }]
            assert showcase == {
                'image_url':
                'http://www.internal-displacement.org/global-report/grid2018/img/ogimage.jpg',
                'name':
                'idmc-global-report-on-internal-displacement',
                'notes':
                'Click the image on the right to go to the IDMC Global Report on Internal Displacement',
                'tags': [{
                    'name':
                    'hxl',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'displacement',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'internally displaced persons - idp',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'violence and conflict',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }],
                'title':
                'IDMC Global Report on Internal Displacement',
                'url':
                'http://www.internal-displacement.org/global-report/grid2018/'
            }
            #  country datasets tests
            dataset, showcase, disables_bites = generate_country_dataset_and_showcase(
                downloader, folder, headersdata, 'AFG', countriesdata['AFG'],
                datasets, tags)
            assert dataset == TestIDMC.afg_dataset
            resources = dataset.get_resources()
            assert resources == [{
                'description':
                'Internally displaced persons - IDPs for Afghanistan',
                'format': 'csv',
                'name': 'displacement_data',
                'resource_type': 'file.upload',
                'url_type': 'upload'
            }, {
                'description':
                'Internally displaced persons - IDPs (new displacement associated with disasters) for Afghanistan',
                'format': 'csv',
                'name': 'disaster_data',
                'resource_type': 'file.upload',
                'url_type': 'upload'
            }]
            resource_name = '%s.%s' % (resources[0]['name'],
                                       resources[0]['format'])
            expected_file = join('tests', 'fixtures', resource_name)
            actual_file = join(folder, resource_name)
            assert_files_same(expected_file, actual_file)
            resource_name = '%s.%s' % (resources[1]['name'],
                                       resources[1]['format'])
            expected_file = join('tests', 'fixtures', resource_name)
            actual_file = join(folder, resource_name)
            assert_files_same(expected_file, actual_file)

            assert showcase == {
                'name':
                'idmc-idp-data-for-afghanistan-showcase',
                'title':
                'IDMC Afghanistan Summary Page',
                'notes':
                'Click the image on the right to go to the IDMC summary page for the Afghanistan dataset',
                'url':
                'http://www.internal-displacement.org/countries/Afghanistan/',
                'image_url':
                'http://www.internal-displacement.org/sites/default/files/logo_0.png',
                'tags': [{
                    'name':
                    'hxl',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'displacement',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'internally displaced persons - idp',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'violence and conflict',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }]
            }
            assert disables_bites == [False, False, False]

            dataset, showcase, disables_bites = generate_country_dataset_and_showcase(
                downloader, folder, headersdata, 'TZA', countriesdata['TZA'],
                datasets, tags)
            assert dataset == {
                'name':
                'idmc-idp-data-for-united-republic-of-tanzania',
                'title':
                'United Republic of Tanzania - Internally displaced persons - IDPs',
                'maintainer':
                '196196be-6037-4488-8b71-d786adf4c081',
                'owner_org':
                '647d9d8c-4cac-4c33-b639-649aad1c2893',
                'data_update_frequency':
                '365',
                'subnational':
                '0',
                'tags': [{
                    'name':
                    'hxl',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'displacement',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'internally displaced persons - idp',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'violence and conflict',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }],
                'groups': [{
                    'name': 'tza'
                }],
                'notes':
                "Description\n\nContains data from IDMC's [Global Internal Displacement Database](http://www.internal-displacement.org/database/displacement-data).",
                'methodology_other':
                '',
                'caveats':
                '',
                'dataset_date':
                '01/01/2011-12/31/2012'
            }
            resources = dataset.get_resources()
            assert resources == [{
                'description':
                'Internally displaced persons - IDPs for United Republic of Tanzania',
                'format': 'csv',
                'name': 'displacement_data',
                'resource_type': 'file.upload',
                'url_type': 'upload'
            }, {
                'description':
                'Internally displaced persons - IDPs (new displacement associated with disasters) for United Republic of Tanzania',
                'format': 'csv',
                'name': 'disaster_data',
                'resource_type': 'file.upload',
                'url_type': 'upload'
            }]
            resource_name = '%s.%s' % (resources[0]['name'],
                                       resources[0]['format'])
            expected_file = join('tests', 'fixtures', resource_name)
            actual_file = join(folder, resource_name)
            assert_files_same(expected_file, actual_file)
            resource_name = '%s.%s' % (resources[1]['name'],
                                       resources[1]['format'])
            expected_file = join('tests', 'fixtures', resource_name)
            actual_file = join(folder, resource_name)
            assert_files_same(expected_file, actual_file)

            assert showcase == {
                'name':
                'idmc-idp-data-for-united-republic-of-tanzania-showcase',
                'title':
                'IDMC United Republic of Tanzania Summary Page',
                'notes':
                'Click the image on the right to go to the IDMC summary page for the United Republic of Tanzania dataset',
                'url':
                'http://www.internal-displacement.org/countries/Tanzania/',
                'image_url':
                'http://www.internal-displacement.org/sites/default/files/logo_0.png',
                'tags': [{
                    'name':
                    'hxl',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'displacement',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'internally displaced persons - idp',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'violence and conflict',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }]
            }
            assert disables_bites == [True, True, False]

            dataset, showcase, disables_bites = generate_country_dataset_and_showcase(
                downloader, folder, headersdata, 'AB9', countriesdata['AB9'],
                datasets, tags)
            assert dataset is None
            assert showcase is None
            assert disables_bites is None
示例#27
0
    def test_save(self, configuration, fixtures, hxltags):
        with temp_dir('TestScraperSave', delete_on_success=True, delete_on_failure=False) as tempdir:
            with Download(user_agent='test') as downloader:
                tabs = configuration['tabs']
                sheetname = list(tabs.values())[0]
                noout = NoOutput(tabs)
                excelout = ExcelOutput(join(tempdir, 'test_output.xlsx'), tabs, tabs)
                gsheet_auth = getenv('GSHEET_AUTH')
                if not gsheet_auth:
                    raise ValueError('No gsheet authorisation supplied!')
                googleout = GoogleSheets(configuration, gsheet_auth, None, tabs, tabs)
                jsonout = JsonOutput(configuration, tabs)
                output = [list(hxltags.keys()), list(hxltags.values()), ['AFG', 'Afghanistan', 38041754]]

                # won't do anything as wrong tab name
                excelout.update_tab('lala', output, hxltags=hxltags)
                googleout.update_tab('lala', output, hxltags=hxltags)
                jsonout.update_tab('lala', output, hxltags=hxltags)

                noout.update_tab('national', output, hxltags=hxltags)
                excelout.update_tab('national', output, hxltags=hxltags)
                googleout.update_tab('national', output, hxltags=hxltags)
                jsonout.update_tab('national', output, hxltags=hxltags)
                noout.add_additional_json(downloader, today=parse_date('2020-10-01'))
                jsonout.add_additional_json(downloader, today=parse_date('2020-10-01'))
                noout.save()
                excelout.save()
                filepaths = jsonout.save(tempdir, countries_to_save=['AFG'])
                excelsheet = excelout.workbook.get_sheet_by_name(sheetname)

                def get_list_from_cells(cells):
                    result = [list(), list(), list()]
                    for i, row in enumerate(excelsheet[cells]):
                        for column in row:
                            result[i].append(column.value)
                    return result

                assert get_list_from_cells('A1:C3') == output
                spreadsheet = googleout.gc.open_by_url(configuration['googlesheets']['test'])
                googletab = spreadsheet.worksheet_by_title(sheetname)
                result = googletab.get_values(start=(1, 1), end=(3, 3), returnas='matrix')
                result[2][2] = int(result[2][2])
                assert result == output
                assert filecmp.cmp(filepaths[0], join(fixtures, 'test_scraper_all.json'))
                assert filecmp.cmp(filepaths[1], join(fixtures, 'test_scraper_population.json'))
                assert filecmp.cmp(filepaths[2], join(fixtures, 'test_scraper_population.json'))
                assert filecmp.cmp(filepaths[3], join(fixtures, 'test_scraper_other.json'))

                jsonout.json = dict()
                df = pandas.DataFrame(output[2:], columns=output[0])
                noout.update_tab('national', df, hxltags=hxltags)
                excelout.update_tab('national', df, hxltags=hxltags)
                googleout.update_tab('national', df, hxltags=hxltags)
                jsonout.update_tab('national', df, hxltags=hxltags)
                jsonout.add_additional_json(downloader, today=parse_date('2020-10-01'))
                filepaths = jsonout.save(tempdir, countries_to_save=['AFG'])
                assert get_list_from_cells('A1:C3') == output
                result = googletab.get_values(start=(1, 1), end=(3, 3), returnas='matrix')
                result[2][2] = int(result[2][2])
                assert result == output
                assert filecmp.cmp(filepaths[0], join(fixtures, 'test_scraper_all.json'))
                assert filecmp.cmp(filepaths[1], join(fixtures, 'test_scraper_population.json'))
                assert filecmp.cmp(filepaths[2], join(fixtures, 'test_scraper_population.json'))
                assert filecmp.cmp(filepaths[3], join(fixtures, 'test_scraper_other.json'))

                df = pandas.DataFrame(output[1:], columns=output[0])
                googleout.update_tab('national', df, limit=2)
                result = googletab.get_values(start=(1, 1), end=(3, 3), returnas='matrix')
                result[2][2] = int(result[2][2])
示例#28
0
    def test_temp_dir(self, monkeypatch, mytestdir):
        monkeypatch.setenv("TEMP_DIR", mytestdir)
        with temp_dir() as tempdir:
            assert tempdir == mytestdir
        monkeypatch.delenv("TEMP_DIR")

        tempfolder = "papa"
        expected_dir = join(gettempdir(), tempfolder)

        with temp_dir(tempfolder) as tempdir:
            assert tempdir == expected_dir
        assert exists(tempdir) is False
        try:
            with temp_dir(tempfolder) as tempdir:
                assert tempdir == expected_dir
                raise ValueError("Fail!")
        except ValueError:
            pass
        assert exists(tempdir) is False

        with temp_dir(tempfolder,
                      delete_on_success=True,
                      delete_on_failure=True) as tempdir:
            assert tempdir == expected_dir
        assert exists(tempdir) is False
        try:
            with temp_dir(tempfolder,
                          delete_on_success=True,
                          delete_on_failure=True) as tempdir:
                assert tempdir == expected_dir
                raise ValueError("Fail!")
        except ValueError:
            pass
        assert exists(tempdir) is False

        with temp_dir(tempfolder,
                      delete_on_success=False,
                      delete_on_failure=False) as tempdir:
            assert tempdir == expected_dir
        assert exists(tempdir) is True
        rmtree(tempdir)
        try:
            with temp_dir(tempfolder,
                          delete_on_success=False,
                          delete_on_failure=False) as tempdir:
                assert tempdir == expected_dir
                raise ValueError("Fail!")
        except ValueError:
            pass
        assert exists(tempdir) is True

        with temp_dir(tempfolder,
                      delete_on_success=True,
                      delete_on_failure=False) as tempdir:
            assert tempdir == expected_dir
        assert exists(tempdir) is False
        try:
            with temp_dir(tempfolder,
                          delete_on_success=True,
                          delete_on_failure=False) as tempdir:
                assert tempdir == expected_dir
                raise ValueError("Fail!")
        except ValueError:
            pass
        assert exists(tempdir) is True
        rmtree(tempdir)

        with temp_dir(tempfolder,
                      delete_on_success=False,
                      delete_on_failure=True) as tempdir:
            assert tempdir == expected_dir
        assert exists(tempdir) is True
        rmtree(tempdir)
        try:
            with temp_dir(tempfolder,
                          delete_on_success=False,
                          delete_on_failure=True) as tempdir:
                assert tempdir == expected_dir
                raise ValueError("Fail!")
        except ValueError:
            pass
        assert exists(tempdir) is False
示例#29
0
    def test_generate_dataset_and_showcase(self, configuration, downloader):
        with temp_dir("faostat-test") as folder:
            filelist_url = configuration["filelist_url"]
            showcase_base_url = configuration["showcase_base_url"]
            (
                dataset,
                showcase,
                bites_disabled,
                qc_indicators,
            ) = generate_dataset_and_showcase(
                "Food Security",
                TestFaostat.indicatorsets,
                TestFaostat.country,
                TestFaostat.countrymapping,
                showcase_base_url,
                filelist_url,
                downloader,
                folder,
            )
            assert dataset == {
                "name":
                "faostat-food-security-indicators-for-afghanistan",
                "title":
                "Afghanistan - Food Security Indicators",
                "notes":
                "Food Security Indicators for Afghanistan.\n\nContains data from the FAOSTAT [bulk data service](http://lala/datasets_E.json).",
                "maintainer":
                "196196be-6037-4488-8b71-d786adf4c081",
                "owner_org":
                "ed727a5b-3e6e-4cd6-b97e-4a71532085e6",
                "data_update_frequency":
                "365",
                "subnational":
                "0",
                "tags": [
                    {
                        "name": "hxl",
                        "vocabulary_id":
                        "4e61d464-4943-4e97-973a-84673c1aaa87",
                    },
                    {
                        "name": "indicators",
                        "vocabulary_id":
                        "4e61d464-4943-4e97-973a-84673c1aaa87",
                    },
                    {
                        "name": "food security",
                        "vocabulary_id":
                        "4e61d464-4943-4e97-973a-84673c1aaa87",
                    },
                ],
                "groups": [{
                    "name": "afg"
                }],
                "dataset_date":
                "[1999-01-01T00:00:00 TO 2014-12-31T00:00:00]",
            }

            resources = dataset.get_resources()
            assert resources == [
                {
                    "name":
                    "Suite of Food Security Indicators for Afghanistan",
                    "description":
                    "*Suite of Food Security Indicators:*\nFor detailed description of the indicators below see attached document: Average Dietary Supply Adequacy;...",
                    "format": "csv",
                    "resource_type": "file.upload",
                    "url_type": "upload",
                },
                {
                    "name":
                    "QuickCharts-Suite of Food Security Indicators for Afghanistan",
                    "description": "Cut down data for QuickCharts",
                    "format": "csv",
                    "resource_type": "file.upload",
                    "url_type": "upload",
                },
            ]
            assert showcase == {
                "name":
                "faostat-food-security-indicators-for-afghanistan-showcase",
                "title":
                "Afghanistan - Food Security Indicators",
                "notes":
                "Food Security Data Dashboard for Afghanistan",
                "url":
                "http://www.fao.org/faostat/en/#country/AFG",
                "image_url":
                "https://pbs.twimg.com/profile_images/1375385494167691269/Bc49-Yx8_400x400.jpg",
                "tags": [
                    {
                        "name": "hxl",
                        "vocabulary_id":
                        "4e61d464-4943-4e97-973a-84673c1aaa87",
                    },
                    {
                        "name": "indicators",
                        "vocabulary_id":
                        "4e61d464-4943-4e97-973a-84673c1aaa87",
                    },
                    {
                        "name": "food security",
                        "vocabulary_id":
                        "4e61d464-4943-4e97-973a-84673c1aaa87",
                    },
                ],
            }
            assert bites_disabled == [False, True, True]
            assert qc_indicators == [
                {
                    "code": "21010",
                    "title": "Average dietary energy supply adequacy",
                    "unit": "Percentage",
                },
                {
                    "code": "210041",
                    "title": "Prevalence of undernourishment",
                    "unit": "Percentage",
                },
                {
                    "code": "21034",
                    "title":
                    "Percentage of arable land equipped for irrigation",
                    "unit": "Percentage",
                },
            ]
            file = "Suite of Food Security Indicators_AFG.csv"
            assert_files_same(join("tests", "fixtures", file),
                              join(folder, file))
            file = f"qc_{file}"
            assert_files_same(join("tests", "fixtures", file),
                              join(folder, file))
示例#30
0
    def test_generate_dataset_and_showcase(self, configuration):
        with temp_dir('HDRO') as folder:
            qc_indicators = configuration['qc_indicators']
            dataset, showcase, bites_disabled = generate_dataset_and_showcase(
                folder, 'AFG', hdro_data, qc_indicators)
            assert dataset == {
                'name':
                'hdro-data-for-afghanistan',
                'title':
                'Afghanistan - Human Development Indicators',
                'maintainer':
                '872427e4-7e9b-44d6-8c58-30d5052a00a2',
                'owner_org':
                '89ebe982-abe9-4748-9dde-cf04632757d6',
                'data_update_frequency':
                '365',
                'subnational':
                '0',
                'groups': [{
                    'name': 'afg'
                }],
                'tags': [{
                    'name':
                    'health',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'education',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'socioeconomic',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'demographics',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'development',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'indicators',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'hxl',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }],
                'dataset_date':
                '[2008-01-01T00:00:00 TO 2019-12-31T00:00:00]'
            }
            resources = dataset.get_resources()
            assert resources == [{
                'name': 'Human Development Indicators for Afghanistan',
                'description': 'Human development data with HXL tags',
                'format': 'csv',
                'resource_type': 'file.upload',
                'url_type': 'upload'
            }, {
                'name':
                'QuickCharts-Human Development Indicators for Afghanistan',
                'description': 'Cut down data for QuickCharts',
                'format': 'csv',
                'resource_type': 'file.upload',
                'url_type': 'upload'
            }]

            assert showcase == {
                'name':
                'hdro-data-for-afghanistan-showcase',
                'title':
                'Indicators for Afghanistan',
                'notes':
                'Human Development indicators for Afghanistan',
                'url':
                'http://hdr.undp.org/en/countries/profiles/AFG',
                'image_url':
                'https://s1.stabroeknews.com/images/2019/12/undp.jpg',
                'tags': [{
                    'name':
                    'health',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'education',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'socioeconomic',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'demographics',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'development',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'indicators',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }, {
                    'name':
                    'hxl',
                    'vocabulary_id':
                    '4e61d464-4943-4e97-973a-84673c1aaa87'
                }]
            }
            assert bites_disabled == [False, False, True]
            file = 'hdro_indicators_AFG.csv'
            assert_files_same(join('tests', 'fixtures', file),
                              join(folder, file))
            file = 'qc_%s' % file
            assert_files_same(join('tests', 'fixtures', file),
                              join(folder, file))