Python Retrieve示例，hdx.utilities.retriever.Retrieve Python示例

示例#1

0

显示文件

文件： run.py 项目： OCHA-DAP/hdx-scraper-covid-viz

def main(excel_path, gsheet_auth, updatesheets, updatetabs, scrapers, basic_auths, other_auths, nojson,
         countries_override, save, use_saved, **ignore):
    logger.info('##### hdx-scraper-covid-viz version %.1f ####' % VERSION)
    configuration = Configuration.read()
    with temp_dir() as temp_folder:
        with Download(rate_limit={'calls': 1, 'period': 0.1}) as downloader:
            retriever = Retrieve(downloader, temp_folder, 'saved_data', temp_folder, save, use_saved)
            if scrapers:
                logger.info('Updating only scrapers: %s' % scrapers)
            tabs = configuration['tabs']
            if updatetabs is None:
                updatetabs = list(tabs.keys())
                logger.info('Updating all tabs')
            else:
                logger.info('Updating only these tabs: %s' % updatetabs)
            noout = NoOutput(updatetabs)
            if excel_path:
                excelout = ExcelOutput(excel_path, tabs, updatetabs)
            else:
                excelout = noout
            if gsheet_auth:
                gsheets = GoogleSheets(configuration, gsheet_auth, updatesheets, tabs, updatetabs)
            else:
                gsheets = noout
            if nojson:
                jsonout = noout
            else:
                jsonout = JsonOutput(configuration, updatetabs)
            outputs = {'gsheets': gsheets, 'excel': excelout, 'json': jsonout}
            today = datetime.now()
            countries_to_save = get_indicators(configuration, today, retriever, outputs, updatetabs, scrapers,
                                               basic_auths, other_auths, countries_override)
            jsonout.add_additional_json(downloader, today=today)
            jsonout.save(countries_to_save=countries_to_save)
            excelout.save()

示例#2

0

显示文件

文件： test_iati.py 项目： OCHA-DAP/hdx-scraper-iati-viz

 def test_run(self, configuration, fixtures_dir):
     with temp_dir("TestIATIViz",
                   delete_on_success=True,
                   delete_on_failure=False) as tempdir:
         with Download(user_agent="test") as downloader:
             retriever = Retrieve(
                 downloader,
                 tempdir,
                 fixtures_dir,
                 tempdir,
                 save=False,
                 use_saved=True,
             )
             today = "2021-05-06"
             start(
                 configuration,
                 today,
                 retriever,
                 tempdir,
                 dportal_params=None,
                 whattorun="covid",
                 filterdate="2020-01",
             )
             for filename in ("flows", "transactions", "reporting_orgs"):
                 csv_filename = f"{filename}.csv"
                 expected_file = join(fixtures_dir, csv_filename)
                 actual_file = join(tempdir, csv_filename)
                 assert_files_same(expected_file, actual_file)
                 json_filename = f"{filename}.json"
                 expected_file = join(fixtures_dir, json_filename)
                 actual_file = join(tempdir, json_filename)
                 assert filecmp.cmp(expected_file, actual_file)

示例#3

0

显示文件

def main(save, use_saved, **ignore):
    """Generate dataset and create it in HDX"""

    with Download(extra_params_yaml=join(expanduser('~'), '.extraparams.yml'),
                  extra_params_lookup=lookup) as token_downloader:
        configuration = Configuration.read()
        with Download() as downloader:
            folder = temp_dir(lookup)
            retriever = Retrieve(downloader, folder, 'saved_data', folder,
                                 save, use_saved)
            wfp = WFPFood(configuration, token_downloader, retriever)
            countries = wfp.get_countries()
            logger.info('Number of country datasets to upload: %d' %
                        len(countries))
            wfp.build_mappings()
            for info, country in progress_storing_tempdir(
                    lookup, countries, 'iso3'):

                dataset, showcase, qc_indicators = wfp.generate_dataset_and_showcase(
                    country['iso3'], info['folder'])
                if dataset:
                    dataset.update_from_yaml()
                    dataset['notes'] = dataset[
                        'notes'] % 'Food Prices data for %s. Food prices data comes from the World Food Programme and covers' % country[
                            'name']
                    dataset.generate_resource_view(-1,
                                                   indicators=qc_indicators)
                    dataset.create_in_hdx(
                        remove_additional_resources=True,
                        hxl_update=False,
                        updated_by_script='HDX Scraper: WFP Food Prices',
                        batch=info['batch'])
                    showcase.create_in_hdx()
                    showcase.add_dataset(dataset)

示例#4

0

显示文件

文件： run.py 项目： OCHA-DAP/hdx-scraper-iati-viz

def main(
    output_dir,
    saved_dir,
    save,
    use_saved,
    dportal_params,
    whattorun,
    filterdate,
    **ignore,
):
    logger.info(f"##### hdx-scraper-iati-viz version {VERSION:.1f} ####")
    configuration = Configuration.read()
    output_dir = f"{output_dir}_{whattorun}"
    rmtree(output_dir, ignore_errors=True)
    mkdir(output_dir)
    with Download() as downloader:
        retriever = Retrieve(
            downloader,
            configuration["fallback_dir"],
            f"{saved_dir}_{whattorun}",
            output_dir,
            save,
            use_saved,
        )
        today = datetime.utcnow().isoformat()
        start(
            configuration,
            today,
            retriever,
            output_dir,
            dportal_params,
            whattorun,
            filterdate,
        )

示例#5

0

显示文件

 def test_get_indicators(self, configuration, folder):
     with temp_dir('TestCovidViz',
                   delete_on_success=True,
                   delete_on_failure=False) as tempdir:
         with Download(user_agent='test') as downloader:
             retriever = Retrieve(downloader,
                                  tempdir,
                                  folder,
                                  tempdir,
                                  save=False,
                                  use_saved=True)
             tabs = configuration['tabs']
             noout = NoOutput(tabs)
             jsonout = JsonOutput(configuration, tabs)
             outputs = {'gsheets': noout, 'excel': noout, 'json': jsonout}
             today = parse_date('2021-05-03')
             countries_to_save = get_indicators(
                 configuration,
                 today,
                 retriever,
                 outputs,
                 tabs,
                 scrapers=[
                     'ifi', 'who_global', 'who_national', 'who_subnational',
                     'who_covid', 'sadd', 'covidtests', 'cadre_harmonise',
                     'access', 'food_prices'
                 ],
                 use_live=False)
             filepaths = jsonout.save(tempdir,
                                      countries_to_save=countries_to_save)
             assert filecmp.cmp(filepaths[0],
                                join(folder, 'test_scraper_all.json'))
             assert filecmp.cmp(filepaths[1],
                                join(folder, 'test_scraper.json'))
             assert filecmp.cmp(filepaths[2],
                                join(folder, 'test_scraper_daily.json'))
             assert filecmp.cmp(
                 filepaths[3], join(folder,
                                    'test_scraper_covidseries.json'))

示例#6

0

显示文件

    def test_retrieve_file(self, tmpdir, retrieverfolder, fallback_dir):
        tmpdir = str(tmpdir)
        saved_dir = join(tmpdir, "saved")
        temp_dir = join(tmpdir, "temp")
        rmtree(temp_dir, ignore_errors=True)
        mkdir(temp_dir)
        with Download() as downloader:
            with pytest.raises(ValueError):
                Retrieve(
                    downloader,
                    fallback_dir,
                    saved_dir,
                    temp_dir,
                    save=True,
                    use_saved=True,
                )

            retriever = Retrieve(
                downloader,
                fallback_dir,
                saved_dir,
                temp_dir,
                save=False,
                use_saved=False,
            )
            filename = "test.txt"
            url = join(retrieverfolder, filename)
            path = retriever.retrieve_file(url,
                                           filename,
                                           logstr="test file",
                                           fallback=True)
            assert path == join(temp_dir, filename)
            path = retriever.retrieve_file("NOTEXIST",
                                           filename,
                                           logstr="test file",
                                           fallback=True)
            assert path == join(fallback_dir, filename)
            with pytest.raises(DownloadError):
                retriever.retrieve_file("NOTEXIST", filename, fallback=False)
            with pytest.raises(DownloadError):
                long_url = "".join(
                    random.SystemRandom().choice(string.ascii_uppercase +
                                                 string.digits)
                    for _ in range(150))
                retriever.retrieve_file(long_url, filename, fallback=False)
            text = retriever.retrieve_text(url,
                                           filename,
                                           logstr="test file",
                                           fallback=False)
            assert text == "hello"
            text = retriever.retrieve_text("NOTEXIST",
                                           filename,
                                           logstr="test file",
                                           fallback=True)
            assert text == "goodbye"
            with pytest.raises(DownloadError):
                retriever.retrieve_text("NOTEXIST", filename, fallback=False)
            filename = "test.yaml"
            url = join(retrieverfolder, filename)
            data = retriever.retrieve_yaml(url,
                                           filename,
                                           logstr="test file",
                                           fallback=False)
            assert data["param_1"] == "ABC"
            data = retriever.retrieve_yaml("NOTEXIST",
                                           filename,
                                           logstr="test file",
                                           fallback=True)
            assert data["param_1"] == "XYZ"
            with pytest.raises(DownloadError):
                retriever.retrieve_yaml("NOTEXIST", filename, fallback=False)
            filename = "test.json"
            url = join(retrieverfolder, filename)
            data = retriever.retrieve_json(url,
                                           filename,
                                           logstr="test file",
                                           fallback=False)
            assert data["my_param"] == "abc"
            data = retriever.retrieve_json("NOTEXIST",
                                           filename,
                                           logstr="test file",
                                           fallback=True)
            assert data["my_param"] == "xyz"
            with pytest.raises(DownloadError):
                retriever.retrieve_json("NOTEXIST", filename, fallback=False)

            retriever = Retrieve(
                downloader,
                fallback_dir,
                saved_dir,
                temp_dir,
                save=True,
                use_saved=False,
            )
            filename = "test.txt"
            url = join(retrieverfolder, filename)
            path = retriever.retrieve_file(url,
                                           filename,
                                           logstr="test file",
                                           fallback=True)
            assert path == join(saved_dir, filename)
            path = retriever.retrieve_file("NOTEXIST",
                                           filename,
                                           logstr="test file",
                                           fallback=True)
            assert path == join(fallback_dir, filename)
            with pytest.raises(DownloadError):
                retriever.retrieve_file("NOTEXIST", filename, fallback=False)
            text = retriever.retrieve_text(url,
                                           filename,
                                           logstr="test file",
                                           fallback=False)
            assert text == "hello"
            text = retriever.retrieve_text("NOTEXIST",
                                           filename,
                                           logstr="test file",
                                           fallback=True)
            assert text == "goodbye"
            with pytest.raises(DownloadError):
                retriever.retrieve_text("NOTEXIST", filename, fallback=False)
            filename = "test.yaml"
            url = join(retrieverfolder, filename)
            data = retriever.retrieve_yaml(url,
                                           filename,
                                           logstr="test file",
                                           fallback=False)
            assert data["param_1"] == "ABC"
            data = retriever.retrieve_yaml("NOTEXIST",
                                           filename,
                                           logstr="test file",
                                           fallback=True)
            assert data["param_1"] == "XYZ"
            with pytest.raises(DownloadError):
                retriever.retrieve_yaml("NOTEXIST", filename, fallback=False)
            filename = "test.json"
            url = join(retrieverfolder, filename)
            data = retriever.retrieve_json(url,
                                           filename,
                                           logstr="test file",
                                           fallback=False)
            assert data["my_param"] == "abc"
            data = retriever.retrieve_json("NOTEXIST",
                                           filename,
                                           logstr="test file",
                                           fallback=True)
            assert data["my_param"] == "xyz"
            with pytest.raises(DownloadError):
                retriever.retrieve_json("NOTEXIST", filename, fallback=False)

            retriever = Retrieve(
                downloader,
                fallback_dir,
                saved_dir,
                temp_dir,
                save=False,
                use_saved=True,
            )
            filename = "test.txt"
            url = join(retrieverfolder, filename)
            path = retriever.retrieve_file(url,
                                           filename,
                                           logstr="test file",
                                           fallback=True)
            assert path == join(saved_dir, filename)
            path = retriever.retrieve_file("NOTEXIST",
                                           filename,
                                           logstr="test file",
                                           fallback=True)
            assert path == join(saved_dir, filename)
            path = retriever.retrieve_file("NOTEXIST",
                                           filename,
                                           fallback=False)
            assert path == join(saved_dir, filename)
            text = retriever.retrieve_text(url,
                                           filename,
                                           logstr="test file",
                                           fallback=False)
            assert text == "hello"
            text = retriever.retrieve_text("NOTEXIST",
                                           filename,
                                           logstr="test file",
                                           fallback=True)
            assert text == "hello"
            text = retriever.retrieve_text("NOTEXIST",
                                           filename,
                                           fallback=False)
            assert text == "hello"
            filename = "test.yaml"
            url = join(retrieverfolder, filename)
            data = retriever.retrieve_yaml(url,
                                           filename,
                                           logstr="test file",
                                           fallback=False)
            assert data["param_1"] == "ABC"
            data = retriever.retrieve_yaml("NOTEXIST",
                                           filename,
                                           logstr="test file",
                                           fallback=True)
            assert data["param_1"] == "ABC"
            data = retriever.retrieve_yaml("NOTEXIST",
                                           filename,
                                           fallback=False)
            assert data["param_1"] == "ABC"
            filename = "test.json"
            url = join(retrieverfolder, filename)
            data = retriever.retrieve_json(url,
                                           filename,
                                           logstr="test file",
                                           fallback=False)
            assert data["my_param"] == "abc"
            data = retriever.retrieve_json("NOTEXIST",
                                           filename,
                                           logstr="test file",
                                           fallback=True)
            assert data["my_param"] == "abc"
            data = retriever.retrieve_json("NOTEXIST",
                                           filename,
                                           fallback=False)
            assert data["my_param"] == "abc"

            retriever = Retrieve(
                downloader,
                fallback_dir,
                saved_dir,
                temp_dir,
                save=False,
                use_saved=True,
            )

示例#7

0

显示文件

文件： test_wfpfood.py 项目： OCHA-DAP/hdx-scraper-wfp-foodprices

 def test_run(self, configuration, fixtures_dir, input_dir):
     with temp_dir('TestIATIViz',
                   delete_on_success=True,
                   delete_on_failure=False) as tempdir:
         with Download(user_agent='test') as downloader:
             retriever = Retrieve(downloader,
                                  tempdir,
                                  input_dir,
                                  tempdir,
                                  save=False,
                                  use_saved=True)
             wfp = WFPFood(configuration, None, retriever)
             countries = wfp.get_countries()
             assert len(countries) == 291
             assert countries[100:102] == [{
                 'iso3': 'GTM',
                 'name': 'Guatemala'
             }, {
                 'iso3': 'GUF',
                 'name': 'French Guiana'
             }]
             wfp.build_mappings()
             dataset, showcase, qc_indicators = wfp.generate_dataset_and_showcase(
                 'COG', tempdir)
             assert dataset == {
                 'name':
                 'wfp-food-prices-for-congo',
                 'title':
                 'Congo - Food Prices',
                 'maintainer':
                 'f1921552-8c3e-47e9-9804-579b14a83ee3',
                 'owner_org':
                 '3ecac442-7fed-448d-8f78-b385ef6f84e7',
                 'data_update_frequency':
                 '7',
                 'groups': [{
                     'name': 'cog'
                 }],
                 'subnational':
                 '1',
                 'tags': [{
                     'name':
                     'commodities',
                     'vocabulary_id':
                     'b891512e-9516-4bf5-962a-7a289772a2a1'
                 }, {
                     'name':
                     'prices',
                     'vocabulary_id':
                     'b891512e-9516-4bf5-962a-7a289772a2a1'
                 }, {
                     'name':
                     'markets',
                     'vocabulary_id':
                     'b891512e-9516-4bf5-962a-7a289772a2a1'
                 }, {
                     'name':
                     'hxl',
                     'vocabulary_id':
                     'b891512e-9516-4bf5-962a-7a289772a2a1'
                 }],
                 'dataset_source':
                 'CARITAS, GOV, Gvt, National Institute Of Statistics (INS), WFP',
                 'dataset_date':
                 '[2011-01-15T00:00:00 TO 2020-07-15T00:00:00]'
             }
             assert showcase == {
                 'name':
                 'wfp-food-prices-for-congo-showcase',
                 'title':
                 'Congo - Food Prices showcase',
                 'notes':
                 'Congo food prices data from World Food Programme displayed through VAM Economic Explorer',
                 'url':
                 'http://dataviz.vam.wfp.org/economic_explorer/prices?iso3=COG',
                 'image_url':
                 'http://dataviz.vam.wfp.org/_images/home/3_economic.jpg',
                 'tags': [{
                     'name':
                     'commodities',
                     'vocabulary_id':
                     'b891512e-9516-4bf5-962a-7a289772a2a1'
                 }, {
                     'name':
                     'prices',
                     'vocabulary_id':
                     'b891512e-9516-4bf5-962a-7a289772a2a1'
                 }, {
                     'name':
                     'markets',
                     'vocabulary_id':
                     'b891512e-9516-4bf5-962a-7a289772a2a1'
                 }, {
                     'name':
                     'hxl',
                     'vocabulary_id':
                     'b891512e-9516-4bf5-962a-7a289772a2a1'
                 }]
             }
             assert qc_indicators == [{
                 'code':
                 'Brazzaville-Brazzaville-Total-Groundnuts (shelled)-KG-XAF',
                 'title': 'Price of Groundnuts (shelled) in Total',
                 'unit': 'Currency XAF',
                 'description':
                 'Price of Groundnuts (shelled) (XAF/KG) in Brazzaville/Total',
                 'code_col': '#meta+code',
                 'value_col': '#value',
                 'date_col': '#date'
             }, {
                 'code':
                 'Point-Noire-Pointe-Noire-Grand marché/Fond Ntié-Ntié/Nkouikou-Oil (vegetable)-L-XAF',
                 'title':
                 'Price of Oil (vegetable) in Grand marché/Fond Ntié-Ntié/Nkouikou',
                 'unit': 'Currency XAF',
                 'description':
                 'Price of Oil (vegetable) (XAF/L) in Point-Noire/Pointe-Noire/Grand marché/Fond Ntié-Ntié/Nkouikou',
                 'code_col': '#meta+code',
                 'value_col': '#value',
                 'date_col': '#date'
             }, {
                 'code':
                 'Pool-Kinkala-Kinkala-Rice (mixed, low quality)-KG-XAF',
                 'title': 'Price of Rice (mixed, low quality) in Kinkala',
                 'unit': 'Currency XAF',
                 'description':
                 'Price of Rice (mixed, low quality) (XAF/KG) in Pool/Kinkala',
                 'code_col': '#meta+code',
                 'value_col': '#value',
                 'date_col': '#date'
             }]
             for filename in ('wfp_food_prices_cog',
                              'wfp_food_prices_cog_qc'):
                 csv_filename = f'{filename}.csv'
                 expected_file = join(fixtures_dir, csv_filename)
                 actual_file = join(tempdir, csv_filename)
                 assert_files_same(expected_file, actual_file)