def update_config(self, config_): toolkit.add_template_directory(config_, 'templates') toolkit.add_public_directory(config_, 'public') toolkit.add_resource('fanstatic', 'knowledgehub') # Eliminates the need to re-initialize the database when model changes. # _init_knowledgehub_database() _patch_ckan_base_controller() # patch the CKAN core functionality patch_ckan_core_search() # Extend CKAN Tag table # extend_tag_table() # Extend CKAN ResourceView table # extend_resource_view_table() # Upgrade the dashboard table. #dashboard_table_upgrade() DatastoreBackend.register_backends() # DatastoreBackend.set_active_backend(config) # Create the HDX configuration hdx_api_key = config.get(u'ckanext.knowledgehub.hdx.api_key') hdx_site = config.get(u'ckanext.knowledgehub.hdx.site', 'test') Configuration.delete() Configuration.create( hdx_site=hdx_site, # from config, default to test user_agent='admin', hdx_key=hdx_api_key)
def test_validlocations(self, project_config_yaml): Country.countriesdata(use_live=False) validlocations = [{'name': 'shn', 'title': 'St. Helena'}] assert Locations.get_HDX_code_from_location('sh', locations=validlocations) is None assert Locations.get_HDX_code_from_location_partial('sh', locations=validlocations) == (None, False) assert Locations.get_location_from_HDX_code('shn', locations=validlocations) == 'St. Helena' validlocations = [{'name': 'zmb', 'title': 'Zambia'}, {'name': 'pry', 'title': 'Paraguay'}] Locations.set_validlocations(validlocations) assert Locations.validlocations() == validlocations assert Locations.get_HDX_code_from_location_partial('NOT') == (None, False) assert Locations.get_location_from_HDX_code('pr') is None assert Locations.get_HDX_code_from_location('zmb') == 'ZMB' assert Locations.get_HDX_code_from_location_partial('zmb') == ('ZMB', True) assert Locations.get_HDX_code_from_location('Z') is None assert Locations.get_HDX_code_from_location_partial('Z') == ('ZMB', False) assert Locations.get_HDX_code_from_location_partial('Zambia') == ('ZMB', True) assert Locations.get_HDX_code_from_location_partial('ZAM') == ('ZMB', False) assert Locations.get_location_from_HDX_code('zmb', locations=validlocations) == 'Zambia' validlocations = [{'name': 'shn', 'title': 'St. Helena'}] assert Locations.get_HDX_code_from_location('sh', locations=validlocations) is None assert Locations.get_HDX_code_from_location_partial('sh', locations=validlocations) == (None, False) assert Locations.get_location_from_HDX_code('shn', locations=validlocations) == 'St. Helena' Configuration.setup(MyConfiguration()) Locations.set_validlocations(None) assert Locations.get_HDX_code_from_location('zaf') == 'ZAF' assert Locations.get_HDX_code_from_location_partial('zaf') == ('ZAF', True) assert Locations.get_location_from_HDX_code('zaf') == 'South Africa'
def get_dataset_from_hdx(hdx_address: str, dataset_name: str, output_filename: str): """ Use the HDX API to download a daset based on the address and dataset ID :param hdx_address: The HDX address of the dataset :param dataset_name: The name of the dataset :param save_filepath: The desired full filepath of the downloaded file :param cache_days: How many days to cache the file (temporary for development) """ HDX_SITE = 'prod' USER_AGENT = 'MapAction' Configuration.create(hdx_site=HDX_SITE, user_agent=USER_AGENT, hdx_read_only=True) logger = logging.getLogger(__name__) # TODO: make more generic caching ability # file_age_days = utils.get_file_age_days(save_filepath) # if 0 < file_age_days < cache_days: # return save_filepath logger.info(f'Querying HDX API for dataset {hdx_address}') resources = Dataset.read_from_hdx(hdx_address).get_resources() for resource in resources: if resource['name'] == dataset_name: _, download_filepath = resource.download() copy_file(source_path=download_filepath, target_path=output_filename) save_file(output_filename) logger.info(f'Saved to {output_filename}') return output_filename raise HDXDatasetNotFound( f'HDX dataset with address "{hdx_address}" and name "{dataset_name}" not found' )
def configuration(self): Configuration._create(hdx_read_only=True, user_agent='test', project_config_yaml=join( 'tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{ 'name': 'afg', 'title': 'Afghanistan' }]) # add locations used in tests Country.countriesdata(use_live=False) Vocabulary._tags_dict = True Vocabulary._approved_vocabulary = { 'tags': [{ 'name': 'hxl' }, { 'name': 'commodities' }, { 'name': 'prices' }, { 'name': 'markets' }], 'id': '4e61d464-4943-4e97-973a-84673c1aaa87', 'name': 'approved' }
def configuration(self): Configuration._create(hdx_read_only=True, user_agent='test', project_config_yaml=join('tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{'name': 'arg', 'title': 'Argentina'}]) # add locations used in tests Country.countriesdata(use_live=False) Vocabulary._tags_dict = True Vocabulary._approved_vocabulary = {'tags': [{'name': 'sustainable development'}, {'name': 'demographics'}, {'name': 'socioeconomics'}, {'name': 'education'}], 'id': '4e61d464-4943-4e97-973a-84673c1aaa87', 'name': 'approved'}
def configuration(self): Configuration._create( user_agent="test", hdx_key="12345", project_config_yaml=join("tests", "config", "project_configuration.yml"), ) Locations.set_validlocations([{"name": "bgd", "title": "Bangladesh"}]) Country.countriesdata(use_live=False) Vocabulary._tags_dict = True Vocabulary._approved_vocabulary = { "tags": [ { "name": "hxl" }, { "name": "violence and conflict" }, { "name": "protests" }, { "name": "security incidents" }, ], "id": "4e61d464-4943-4e97-973a-84673c1aaa87", "name": "approved", }
def configuration(self): Configuration._create(user_agent='test', hdx_key='12345', project_config_yaml=join( 'tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{ 'name': 'afg', 'title': 'Afghanistan' }, { 'name': 'cmr', 'title': 'Cameroon' }]) Country.countriesdata(use_live=False) Vocabulary._tags_dict = True Vocabulary._approved_vocabulary = { 'tags': [{ 'name': 'hxl' }, { 'name': 'violence and conflict' }, { 'name': 'protests' }, { 'name': 'security incidents' }], 'id': '4e61d464-4943-4e97-973a-84673c1aaa87', 'name': 'approved' }
def configuration(self): Configuration._create(hdx_read_only=True, user_agent='test', project_config_yaml=join( 'tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{ 'name': 'afg', 'title': 'Afghanistan' }, { 'name': 'tza', 'title': 'Tanzania' }, { 'name': 'world', 'title': 'World' }]) Country.countriesdata(use_live=False) Vocabulary._tags_dict = True Vocabulary._approved_vocabulary = { 'tags': [{ 'name': 'hxl' }, { 'name': 'violence and conflict' }, { 'name': 'displacement' }, { 'name': 'internally displaced persons - idp' }], 'id': '4e61d464-4943-4e97-973a-84673c1aaa87', 'name': 'approved' }
def test_hdx_configuration_yaml(self, hdx_key_file, hdx_config_yaml, project_config_yaml): Configuration._create(hdx_key_file=hdx_key_file, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) expected_configuration = { 'api_key': '12345', 'param_1': 'ABC', 'hdx_prod_site': { 'url': 'https://data.humdata.org/', 'username': None, 'password': None }, 'hdx_test_site': { 'url': 'https://test-data.humdata.org/', 'username': '******', 'password': '******' }, 'dataset': { 'required_fields': [ 'name', 'title', 'dataset_date', ] }, 'resource': { 'required_fields': ['package_id', 'name', 'description'] }, 'showcase': { 'required_fields': ['name', 'title'] }, } assert Configuration.read() == expected_configuration
def main(): """Generate dataset and create it in HDX""" filelist_url = Configuration.read()['filelist_url'] country_group_url = Configuration.read()['country_group_url'] dataset_codes = Configuration.read()['dataset_codes'] showcase_base_url = Configuration.read()['showcase_base_url'] with temp_dir('faostat') as folder: with Download() as downloader: indicatortypes = get_indicatortypesdata(filelist_url, downloader) countriesdata = get_countriesdata(country_group_url, downloader) logger.info('Number of indicator types to upload: %d' % len(dataset_codes)) for dataset_code in dataset_codes: datasets, showcases = generate_datasets_and_showcases( downloader, folder, dataset_codes[dataset_code], indicatortypes[dataset_code], countriesdata, showcase_base_url) logger.info('Number of datasets to upload: %d' % len(datasets)) for i, dataset in enumerate(datasets): logger.info('Creating dataset: %s' % dataset['title']) dataset.preview_off() dataset.create_in_hdx() showcase = showcases[i] showcase.create_in_hdx() showcase.add_dataset(dataset)
def configuration(self): Configuration._create(hdx_read_only=True, user_agent='test', project_config_yaml=join('tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{'name': 'gin', 'title': 'Guinea'}]) # add locations used in tests Country.countriesdata(use_live=False) Vocabulary._tags_dict = True Vocabulary._approved_vocabulary = {'tags': [{'name': 'hxl'}, {'name': 'food security'}, {'name': 'indicators'}], 'id': '4e61d464-4943-4e97-973a-84673c1aaa87', 'name': 'approved'}
def test_hdx_configuration_yaml(self, hdx_config_yaml, hdx_base_config_yaml, project_config_yaml): Configuration._create(user_agent='test', hdx_config_yaml=hdx_config_yaml, hdx_base_config_yaml=hdx_base_config_yaml, project_config_yaml=project_config_yaml) expected_configuration = { 'hdx_site': 'prod', 'hdx_read_only': False, 'hdx_key': '12345', 'hdx_prod_site': { 'url': 'https://data.humdata.org', }, 'hdx_test_site': { 'url': 'https://test-data.humdata.org', 'username': '******', 'password': '******' }, 'dataset': {'required_fields': [ 'name', 'title', 'dataset_date', ]}, 'resource': {'required_fields': ['package_id', 'name', 'description']}, 'showcase': {'required_fields': ['name', 'title']}, 'approved_tags_vocabulary': 'Topics', 'tags_list_url': 'https://raw.githubusercontent.com/OCHA-DAP/hdx-python-api/master/tests/fixtures/Accepted_Tags.csv', 'tags_mapping_url': 'https://raw.githubusercontent.com/OCHA-DAP/hdx-python-api/master/tests/fixtures/Tag_Mapping.csv', } assert Configuration.read() == expected_configuration
def configuration(self): Configuration._create(hdx_read_only=True, user_agent='test', project_config_yaml=join( 'tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{'name': 'afg', 'title': 'Afghanistan'}]) Country.countriesdata(use_live=False) Vocabulary._tags_dict = True Vocabulary._approved_vocabulary = { 'tags': [{ 'name': 'hxl' }, { 'name': 'indicators' }, { 'name': 'health' }, { 'name': 'education' }, { 'name': 'socioeconomic' }, { 'name': 'demographics' }, { 'name': 'development' }], 'id': '4e61d464-4943-4e97-973a-84673c1aaa87', 'name': 'approved' } return Configuration.read()
def test_get_hdx_key_site(self, hdx_config_yaml, project_config_yaml): Configuration._create(user_agent='test', hdx_config_yaml=hdx_config_yaml, hdx_base_config_dict={}, project_config_yaml=project_config_yaml) actual_configuration = Configuration.read() assert actual_configuration.get_api_key() == '12345' assert actual_configuration.get_hdx_site_url() == 'https://data.humdata.org' assert actual_configuration._get_credentials() is None assert actual_configuration.get_dataset_url('mydataset') == 'https://data.humdata.org/dataset/mydataset'
def configuration(self): Configuration._create(hdx_site='feature', user_agent='test', hdx_key='12345', project_config_yaml=join('tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{'name': 'afg', 'title': 'Afghanistan'}, {'name': 'cmr', 'title': 'Cameroon'}]) Country.countriesdata(use_live=False) Vocabulary._tags_dict = True Vocabulary._approved_vocabulary = {'tags': [{'name': 'hxl'}, {'name': 'health'}, {'name': 'demographics'}], 'id': '4e61d464-4943-4e97-973a-84673c1aaa87', 'name': 'approved'} return Configuration.read()
def configuration(self): Configuration._create(hdx_read_only=True, hdx_site='prod', user_agent='test', project_config_yaml=join( 'tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{'name': 'cog', 'title': 'Congo'}]) return Configuration.read()
def get_url(self): Configuration.create(hdx_site='prod', user_agent='A_Quick_Example', hdx_read_only=True) dataset = Dataset.read_from_hdx('movement-range-maps') resources = dataset.get_resources() dic = resources[1] self.url = dic['download_url'] return self
def configuration(self): Configuration._create(hdx_read_only=True, user_agent='test', project_config_yaml=join( 'tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{ 'name': 'afg', 'title': 'Afghanistan' }]) # add locations used in tests
def test_get_hdx_key_site(self, hdx_key_file, project_config_yaml): Configuration._create(hdx_site='prod', hdx_key_file=hdx_key_file, hdx_config_dict={}, project_config_yaml=project_config_yaml) actual_configuration = Configuration.read() assert actual_configuration.get_api_key() == '12345' assert actual_configuration.get_hdx_site_url( ) == 'https://data.humdata.org/' assert actual_configuration._get_credentials() == ('', '')
def pop_data_download(region_names, wp_year=2017): from hdx.utilities.easy_logging import setup_logging setup_logging() from hdx.hdx_configuration import Configuration Configuration.create(hdx_site='prod', user_agent='Read-only user', hdx_read_only=True) from hdx.data.dataset import Dataset import wpgpDownload from wpgpDownload.utils.convenience_functions import download_country_covariates as download_worldpop from wpgpDownload.utils.convenience_functions import refresh_csv refresh_csv() hdx_datasets = Dataset.search_in_hdx('hrsl', rows=500) hdx_resources = Dataset.get_all_resources(hdx_datasets) print('') country_names = set([region[0:3] for region in region_names]) for country in country_names: print(country) for res in hdx_resources: if 'population_'+country.lower() in res['name'] and '.zip' in res['name'] and 'csv' not in res['name']: print('Downloading HRSL',res['name'], end='\r') url, path = res.download() print('HRSL',res['name'],'download completed ') shutil.move(Path(path),Path('./'+country+'/misc_data/population_'+country.lower()+'.zip')) zipfile.ZipFile(Path('./'+country+'/misc_data/population_'+country.lower()+'.zip'), 'r').extractall(Path('./'+country+'/misc_data')) for file in Path('./'+country+'/misc_data').iterdir(): if 'population_'+country.lower() in file.name and file.suffix != '.tif': os.remove(file) if type(wp_year) == list: years = wp_year elif type(wp_year) == int: years = [wp_year] #NTL_files = [file for file in Path("./"+country+"/NTL").iterdir() if "NTL" in file.name] # #years = [] #for NTL_file in NTL_files: # years.append(NTL_file.name[4:8]) #years = [year for year in set(years)] #years.sort() for year in years: print('Downloading WorldPop '+country+' '+str(year)+'\t\t',end='\r') download_worldpop(ISO=country,out_folder='.\\'+country+'\\worldpop',prod_name='ppp_'+str(year)) print('WorldPop '+country+' '+str(year)+' download completed\t\t') print("") print('Done')
def test_set_hdx_key_value(self, empty_hdx_key_file, project_config_yaml): with pytest.raises(LoadError): Configuration.load_api_key(empty_hdx_key_file) Configuration._create(hdx_site='prod', hdx_key='TEST_HDX_KEY', hdx_config_dict={}, project_config_yaml=project_config_yaml) configuration = Configuration.read() assert configuration.get_api_key() == 'TEST_HDX_KEY' configuration.set_api_key('NEW API KEY') assert configuration.get_api_key() == 'NEW API KEY' Configuration._create(hdx_site='prod', hdx_read_only=True, hdx_config_dict={}, project_config_yaml=project_config_yaml) assert Configuration.read().get_api_key() is None configuration = Configuration.read() configuration.set_api_key('TEST API KEY') assert configuration.get_api_key() is None configuration.set_read_only(False) assert configuration.get_api_key() == 'TEST API KEY' configuration.set_read_only(True) assert configuration.get_api_key() is None configuration.set_api_key('NEW API KEY') configuration.set_read_only(False) assert configuration.get_api_key() == 'NEW API KEY'
def main(): """Generate dataset and create it in HDX""" with Download() as downloader: indicators = Configuration.read()['indicators'] tags = Configuration.read()['tags'] folder = get_temp_dir('IDMC') datasets, showcase, headersdata, countriesdata = generate_indicator_datasets_and_showcase( downloader, folder, indicators, tags) showcase_not_added = True countries = [{'iso3': x} for x in sorted(countriesdata)] logger.info('Number of indicator datasets to upload: %d' % len(indicators)) logger.info('Number of country datasets to upload: %d' % len(countries)) for i, info, nextdict in multiple_progress_storing_tempdir( 'IDMC', [indicators, countries], ['name', 'iso3']): folder = info['folder'] batch = info['batch'] if i == 0: if showcase_not_added: showcase.create_in_hdx() showcase_not_added = False dataset = datasets[nextdict['name']] dataset.update_from_yaml() dataset.generate_resource_view( join('config', nextdict['resourceview'])) dataset.create_in_hdx(remove_additional_resources=True, hxl_update=False, updated_by_script='HDX Scraper: IDMC', batch=batch) showcase.add_dataset(dataset) else: countryiso = nextdict['iso3'] countrydata = countriesdata[countryiso] dataset, showcase, bites_disabled = \ generate_country_dataset_and_showcase(downloader, folder, headersdata, countryiso, countrydata, datasets, tags) if dataset: dataset.update_from_yaml() dataset.generate_resource_view( bites_disabled=bites_disabled) dataset.create_in_hdx( remove_additional_resources=True, hxl_update=False, updated_by_script='HDX Scraper: IDMC', batch=batch) resources = dataset.get_resources() resource_ids = [ x['id'] for x in sorted(resources, key=lambda x: len(x['name']), reverse=True) ] dataset.reorder_resources(resource_ids, hxl_update=False)
def my_testfnkw(**kwargs): fn = kwargs.get('fn') if fn == 'site': testresult.actual_result = Configuration.read().get_hdx_site_url() elif fn == 'api': testresult.actual_result = Configuration.read().get_api_key() elif fn == 'agent': testresult.actual_result = Configuration.read().user_agent elif fn == 'exc': testresult.actual_result = Configuration.read().get_hdx_site_url() raise ValueError('Some failure!')
def configuration(self): Configuration._create(hdx_read_only=True, user_agent='test', project_config_yaml=join( 'tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{ 'name': 'arg', 'title': 'Argentina' }]) # add locations used in tests Country.countriesdata(use_live=False)
def process_mobility(self): print("Processing Mobility indices data ...") Configuration.create(hdx_site='prod', user_agent='A_Quick_Example', hdx_read_only=True) dataset = Dataset.read_from_hdx('movement-range-maps') resources = dataset.get_resources() dic = resources[1] url_mobility = dic['download_url'] self.file_mobility = "/home/ludo915/code/covsco/data/train/mobility/fr/mvt_range.zip" download_url(url_mobility, self.file_mobility) with ZipFile(self.file_mobility, 'r',) as zipf: zipf.printdir() print('Extracting mv_range file now...') mvt_range = zipf.namelist()[-1] zipf.extract(mvt_range,"/home/ludo915/code/covsco/data/train/mobility/fr/") print('Done!') os.chdir("/home/ludo915/code/covsco/data/train/mobility/fr/") os.system("""grep "FRA" """+ mvt_range + """ > mouvement-range-FRA.txt""") os.system("""head -n 1 """+ mvt_range + """ > header.txt""") os.system("""cat header.txt mouvement-range-FRA.txt > mouvement-range-FRA-final.csv""") os.chdir("/home/ludo915/code/covsco/scripts") self.df = pd.read_csv("/home/ludo915/code/covsco/data/train/mobility/fr/mouvement-range-FRA-final.csv", sep = '\t') print(self.df) self.df["ds"]=pd.to_datetime(self.df["ds"], dayfirst = True) self.df['polygon_name'] = self.df['polygon_name'].replace( {'Ile-de-France': 'Île-de-France',\ '-le-de-France': 'Île-de-France',\ "Auvergne-Rh-ne-Alpes":"Auvergne-Rhône-Alpes",\ "Bourgogne-Franche-Comt-":"Bourgogne-Franche-Comté",\ "Provence-Alpes-C-te d'Azur":"Provence-Alpes-Côte d'Azur"}) self.df2 = pd.read_csv('/home/ludo915/code/covsco/data/train/all_data_merged/fr/Enriched_Covid_history_data.csv') self.df2["date"]=pd.to_datetime(self.df2["date"]) self.df3 = pd.read_csv("/home/ludo915/code/covsco/data/train/pop/fr/regions_departements.csv", sep = ";") self.df.reset_index(inplace= True) self.df2.reset_index(inplace = True) self.df3.reset_index(inplace = True) self.df.drop(columns = ["index"],inplace = True) self.df2.drop(columns = ["index"],inplace = True) self.df3.drop(columns = ["index"],inplace = True) self.df2 = self.df2.merge(self.df3, how='inner', left_on = "numero", right_on = "depnum",suffixes=("","_y")) self.df2 = self.df2.merge(self.df, how ="outer", left_on = ["Region","date"], right_on = ["polygon_name","ds"],suffixes=("","_y")).dropna() print(self.df2) self.df2.to_csv("/home/ludo915/code/covsco/data/train/all_data_merged/fr/Enriched_Covid_history_data.csv", index = False) print('OK') return None
def configuration(self): Configuration._create(hdx_read_only=True, user_agent='test', project_config_yaml=join( 'tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{ 'name': 'afg', 'title': 'Afghanistan' }, { 'name': 'pse', 'title': 'State of Palestine' }]) Country.countriesdata(use_live=False)
def download_data(): print('Downloading metadata...') try: Configuration.create(hdx_site='prod', user_agent='joaomarcos', hdx_read_only=True) except: ... dataset = Dataset.read_from_hdx('novel-coronavirus-2019-ncov-cases') resources = [r for r in dataset.get_resources() if 'iso3' in r['name']] for i in resources: print('Downloading', i['name'] + '...') request.urlretrieve(i['download_url'], i['name'])
def __init__(self, source): """ Initialising the object and HDX Configuration Connection if necessary """ try: # Connect to HDX Configuration.create(hdx_site='prod', user_agent='Dataset_Download', hdx_read_only=True) except: print('There is already a HDX Configuration.') # Start HDX search based on desired data source self.SourceSearch(source)
def configuration(self): Configuration._create(hdx_read_only=True, hdx_site='prod', user_agent='test', project_config_yaml=join( 'tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{ 'name': 'afg', 'title': 'Afghanistan' }, { 'name': 'pse', 'title': 'State of Palestine' }]) return Configuration.read()
def post_update(self): class MockSession(object): @staticmethod def post(url, data, headers, files, allow_redirects, auth=None): datadict = json.loads(data.decode('utf-8')) if url.endswith('show') or 'list' in url: return mockshow(url, datadict) if 'update' not in url: return MockResponse(404, '{"success": false, "error": {"message": "TEST ERROR: Not update", "__type": "TEST ERROR: Not Update Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=ckanext_showcase_update"}') resultdictcopy = copy.deepcopy(showcase_resultdict) merge_two_dictionaries(resultdictcopy, datadict) result = json.dumps(resultdictcopy) if datadict['title'] == 'MyShowcase1': return MockResponse(200, '{"success": true, "result": %s, "help": "http://test-data.humdata.org/api/3/action/help_show?name=ckanext_showcase_update"}' % result) if datadict['title'] == 'MyShowcase2': return MockResponse(404, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=ckanext_showcase_update"}') if datadict['title'] == 'MyShowcase3': return MockResponse(200, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=ckanext_showcase_update"}') return MockResponse(404, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=ckanext_showcase_update"}') Configuration.read().remoteckan().session = MockSession()
def post_create(self): class MockSession(object): @staticmethod def post(url, data, headers, files, allow_redirects, auth=None): datadict = json.loads(data.decode('utf-8')) if 'show' in url: return organization_mockshow(url, datadict) if 'create' not in url: return MockResponse(404, '{"success": false, "error": {"message": "TEST ERROR: Not create", "__type": "TEST ERROR: Not Create Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=organization_create"}') result = json.dumps(resultdict) if datadict['name'] == 'MyOrganization1': return MockResponse(200, '{"success": true, "result": %s, "help": "http://test-data.humdata.org/api/3/action/help_show?name=organization_create"}' % result) if datadict['name'] == 'MyOrganization2': return MockResponse(404, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=organization_create"}') if datadict['name'] == 'MyOrganization3': return MockResponse(200, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=organization_create"}') return MockResponse(404, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=organization_create"}') Configuration.read().remoteckan().session = MockSession()
def main(excel_path, gsheet_auth, updatesheets, updatetabs, scrapers, basic_auths, nojson, **ignore): logger.info('##### hdx-scraper-covid-viz version %.1f ####' % VERSION) configuration = Configuration.read() with Download(rate_limit={'calls': 1, 'period': 0.1}) as downloader: if scrapers: logger.info('Updating only scrapers: %s' % scrapers) tabs = configuration['tabs'] if updatetabs is None: updatetabs = list(tabs.keys()) logger.info('Updating all tabs') else: logger.info('Updating only these tabs: %s' % updatetabs) noout = nooutput(updatetabs) if excel_path: excelout = exceloutput(excel_path, tabs, updatetabs) else: excelout = noout if gsheet_auth: gsheets = googlesheets(configuration, gsheet_auth, updatesheets, tabs, updatetabs) else: gsheets = noout if nojson: jsonout = noout else: jsonout = jsonoutput(configuration, updatetabs) outputs = {'gsheets': gsheets, 'excel': excelout, 'json': jsonout} admininfo = AdminInfo.setup(downloader) get_indicators(configuration, downloader, admininfo, outputs, updatetabs, scrapers, basic_auths) excelout.save() jsonout.add_additional_json(downloader) jsonout.save(hrp_iso3s=admininfo.hrp_iso3s)
def post_delete(self): class MockSession(object): @staticmethod def post(url, data, headers, files, allow_redirects, auth=None): decodedata = data.decode('utf-8') datadict = json.loads(decodedata) if 'show' in url: return organization_mockshow(url, datadict) if 'delete' not in url: return MockResponse( 404, '{"success": false, "error": {"message": "TEST ERROR: Not delete", "__type": "TEST ERROR: Not Delete Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=organization_delete"}' ) if datadict['id'] == 'b67e6c74-c185-4f43-b561-0e114a736f19': return MockResponse( 200, '{"success": true, "result": %s, "help": "http://test-data.humdata.org/api/3/action/help_show?name=organization_delete"}' % decodedata) return MockResponse( 404, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=organization_delete"}' ) Configuration.read().remoteckan().session = MockSession()
def post_resourceview(self): class MockSession(object): @staticmethod def post(url, data, headers, files, allow_redirects, auth=None): decodedata = data.decode('utf-8') return mockresourceview(url, decodedata) Configuration.read().remoteckan().session = MockSession()
def read(self): class MockSession(object): @staticmethod def post(url, data, headers, files, allow_redirects, auth=None): datadict = json.loads(data.decode('utf-8')) return organization_mockshow(url, datadict) Configuration.read().remoteckan().session = MockSession()
def main(): """Generate dataset and create it in HDX""" acled_url = Configuration.read()['acled_url'] countries_url = Configuration.read()['countries_url'] hxlproxy_url = Configuration.read()['hxlproxy_url'] with Download() as downloader: countriesdata = get_countriesdata(countries_url, downloader) logger.info('Number of datasets to upload: %d' % len(countriesdata)) for countrydata in sorted(countriesdata, key=lambda x: x['iso3']): dataset, showcase = generate_dataset_and_showcase(acled_url, hxlproxy_url, downloader, countrydata) if dataset: dataset.update_from_yaml() dataset.create_in_hdx() resource_view = generate_resource_view(dataset) resource_view.create_in_hdx() showcase.create_in_hdx() showcase.add_dataset(dataset) sleep(1)
def test_generate_dataset_and_showcase(self, configuration, downloader): hxlproxy_url = Configuration.read()['hxlproxy_url'] dataset, showcase = generate_dataset_and_showcase('http://lala?', hxlproxy_url, downloader, TestAcledAfrica.countrydata) assert dataset == TestAcledAfrica.dataset resources = dataset.get_resources() assert resources == [TestAcledAfrica.resource] assert showcase == {'name': 'acled-data-for-cameroon-showcase', 'notes': 'Conflict Data Dashboard for Cameroon', 'url': 'https://www.acleddata.com/dashboard/#120', 'tags': [{'name': 'HXL'}, {'name': 'conflicts'}, {'name': 'political violence'}, {'name': 'protests'}], 'title': 'Dashboard for Cameroon', 'image_url': 'https://www.acleddata.com/wp-content/uploads/2018/01/dash.png'} dataset, showcase = generate_dataset_and_showcase('http://lala?', hxlproxy_url, downloader, {'m49': 4, 'iso3': 'AFG', 'countryname': 'Afghanistan'}) assert dataset is None
def facade(projectmainfn, **kwargs): # (Callable[[None], None], Any) -> None """Facade to simplify project setup that calls project main function Args: projectmainfn ((None) -> None): main function of project **kwargs: configuration parameters to pass to HDX Configuration class Returns: None """ # # Setting up configuration # site_url = Configuration._create(**kwargs) logger.info('--------------------------------------------------') logger.info('> Using HDX Python API Library %s' % Configuration.apiversion) logger.info('> HDX Site: %s' % site_url) UserAgent.user_agent = Configuration.read().user_agent projectmainfn()
def tagscleanupdicts(configuration=None, url=None, keycolumn=5, failchained=True): # type: (Optional[Configuration], Optional[str], int, bool) -> Tuple[Dict,List] """ Get tags cleanup dictionaries Args: configuration (Optional[Configuration]): HDX configuration. Defaults to global configuration. url (Optional[str]): Url of tags cleanup spreadsheet. Defaults to None (internal configuration parameter). keycolumn (int): Column number of tag column in spreadsheet. Defaults to 5. failchained (bool): Fail if chained rules found. Defaults to True. Returns: Tuple[Dict,List]: Returns (Tags dictionary, Wildcard tags list) """ if not Tags._tags_dict: if configuration is None: configuration = Configuration.read() with Download(full_agent=configuration.get_user_agent()) as downloader: if url is None: url = configuration['tags_cleanup_url'] Tags._tags_dict = downloader.download_tabular_rows_as_dicts(url, keycolumn=keycolumn) keys = Tags._tags_dict.keys() chainerror = False for i, tag in enumerate(keys): whattodo = Tags._tags_dict[tag] action = whattodo[u'action'] final_tags = whattodo[u'final tags (semicolon separated)'] for final_tag in final_tags.split(';'): if final_tag in keys: index = list(keys).index(final_tag) if index != i: whattodo2 = Tags._tags_dict[final_tag] action2 = whattodo2[u'action'] if action2 != 'OK' and action2 != 'Other': final_tags2 = whattodo2[u'final tags (semicolon separated)'] if final_tag not in final_tags2.split(';'): chainerror = True if failchained: logger.error('Chained rules: %s (%s -> %s) | %s (%s -> %s)' % (action, tag, final_tags, action2, final_tag, final_tags2)) if failchained and chainerror: raise ChainRuleError('Chained rules for tags detected!') Tags._wildcard_tags = list() for tag in Tags._tags_dict: if '*' in tag: Tags._wildcard_tags.append(tag) return Tags._tags_dict, Tags._wildcard_tags
def read(self): class MockSession(object): @staticmethod def post(url, data, headers, files, allow_redirects, auth=None): datadict = json.loads(data.decode('utf-8')) if 'association_delete' in url: TestShowcase.association = 'delete' return MockResponse(200, '{"success": true, "result": null, "help": "http://test-data.humdata.org/api/3/action/help_show?name=ckanext_showcase_package_association_delete"}') elif 'association_create' in url: TestShowcase.association = 'create' result = json.dumps(datadict) return MockResponse(200, '{"success": true, "result": %s, "help": "http://test-data.humdata.org/api/3/action/help_show?name=ckanext_showcase_package_association_create"}' % result) return mockshow(url, datadict) Configuration.read().remoteckan().session = MockSession()
def test_read_from_hdx(self, configuration, read, mocksmtp): user = User.read_from_hdx('9f3e9973-7dbe-4c65-8820-f48578e3ffea') assert user['id'] == '9f3e9973-7dbe-4c65-8820-f48578e3ffea' assert user['name'] == 'MyUser1' user = User.read_from_hdx('TEST2') assert user is None user = User.read_from_hdx('TEST3') assert user is None config = Configuration.read() config.setup_emailer(email_config_dict=TestUser.email_config_dict) user = User.read_from_hdx('9f3e9973-7dbe-4c65-8820-f48578e3ffea') user.email(TestUser.subject, TestUser.text_body, html_body=TestUser.html_body, sender=TestUser.sender, mail_options=TestUser.mail_options, rcpt_options=TestUser.rcpt_options) email = config.emailer() assert email.server.type == 'smtpssl' assert email.server.initargs == TestUser.smtp_initargs assert email.server.username == TestUser.username assert email.server.password == TestUser.password assert email.server.sender == TestUser.sender assert email.server.recipients == ['*****@*****.**'] assert 'Content-Type: multipart/alternative;' in email.server.msg assert '''\ MIME-Version: 1.0 Subject: hello From: [email protected] To: [email protected]''' in email.server.msg assert '''\ Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit hello there''' in email.server.msg assert '''\ Content-Type: text/html; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit <html> <head></head> <body> <p>Hi!<br> </p> </body> </html>''' in email.server.msg assert email.server.send_args == {'mail_options': ['a', 'b'], 'rcpt_options': [1, 2]}
def post_delete(self): class MockSession(object): @staticmethod def post(url, data, headers, files, allow_redirects, auth=None): decodedata = data.decode('utf-8') datadict = json.loads(decodedata) if 'show' in url: return organization_mockshow(url, datadict) if 'delete' not in url: return MockResponse(404, '{"success": false, "error": {"message": "TEST ERROR: Not delete", "__type": "TEST ERROR: Not Delete Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=organization_delete"}') if datadict['id'] == 'b67e6c74-c185-4f43-b561-0e114a736f19': return MockResponse(200, '{"success": true, "result": %s, "help": "http://test-data.humdata.org/api/3/action/help_show?name=organization_delete"}' % decodedata) return MockResponse(404, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=organization_delete"}') Configuration.read().remoteckan().session = MockSession()
def post_delete(self): class MockSession(object): @staticmethod def post(url, data, headers, files, allow_redirects, auth=None): decodedata = data.decode('utf-8') datadict = json.loads(decodedata) if url.endswith('show') or 'list' in url: return mockshow(url, datadict) if 'delete' not in url: return MockResponse(404, '{"success": false, "error": {"message": "TEST ERROR: Not delete", "__type": "TEST ERROR: Not Delete Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=ckanext_showcase_delete"}') if datadict['id'] == '05e392bf-04e0-4ca6-848c-4e87bba10746': return MockResponse(200, '{"success": true, "result": %s, "help": "http://test-data.humdata.org/api/3/action/help_show?name=ckanext_showcase_delete"}' % decodedata) return MockResponse(404, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=ckanext_showcase_delete"}') Configuration.read().remoteckan().session = MockSession()
def test_get_all_users(self, configuration, post_list, mocksmtp): users = User.get_all_users() assert len(users) == 2 config = Configuration.read() config.setup_emailer(email_config_dict=TestUser.email_config_dict) User.email_users(users, TestUser.subject, TestUser.text_body, html_body=TestUser.html_body, sender=TestUser.sender, mail_options=TestUser.mail_options, rcpt_options=TestUser.rcpt_options) email = config.emailer() assert email.server.type == 'smtpssl' assert email.server.initargs == TestUser.smtp_initargs assert email.server.username == TestUser.username assert email.server.password == TestUser.password assert email.server.sender == TestUser.sender assert email.server.recipients == ['*****@*****.**', '*****@*****.**'] assert 'Content-Type: multipart/alternative;' in email.server.msg assert '''\ MIME-Version: 1.0 Subject: hello From: [email protected] To: [email protected], [email protected]''' in email.server.msg assert '''\ Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit hello there''' in email.server.msg assert '''\ Content-Type: text/html; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit <html> <head></head> <body> <p>Hi!<br> </p> </body> </html>''' in email.server.msg assert email.server.send_args == {'mail_options': ['a', 'b'], 'rcpt_options': [1, 2]} with pytest.raises(ValueError): User.email_users(list(), TestUser.subject, TestUser.text_body, sender=TestUser.sender, mail_options=TestUser.mail_options, rcpt_options=TestUser.rcpt_options)
def post_listorgs(self): class MockSession(object): @staticmethod def post(url, data, headers, files, allow_redirects, auth=None): decodedata = data.decode('utf-8') datadict = json.loads(decodedata) if 'user' in url: if 'show' in url: return user_mockshow(url, datadict) elif 'list' in url: return MockResponse(200, '{"success": true, "result": %s, "help": "http://test-data.humdata.org/api/3/action/help_show?name=organization_list"}' % json.dumps(orglist)) elif 'organization' in url: if 'show' in url: result = json.dumps(orgdict) if datadict['id'] == 'b67e6c74-c185-4f43-b561-0e114a736f19' or datadict['id'] == 'TEST1': return MockResponse(200, '{"success": true, "result": %s, "help": "http://test-data.humdata.org/api/3/action/help_show?name=organization_show"}' % result) Configuration.read().remoteckan().session = MockSession()
def post_update(self): class MockSession(object): @staticmethod def post(url, data, headers, files, allow_redirects, auth=None): if isinstance(data, dict): datadict = {k.decode('utf8'): v.decode('utf8') for k, v in data.items()} else: datadict = json.loads(data.decode('utf-8')) if 'show' in url: return mockshow(url, datadict) if 'resource_id' in datadict: if datadict['resource_id'] == '74b74ae1-df0c-4716-829f-4f939a046811': return MockResponse(200, '{"success": true, "result": {"fields": [{"type": "text", "id": "code"}, {"type": "text", "id": "title"}, {"type": "float", "id": "value"}, {"type": "timestamp", "id": "latest_date"}, {"type": "text", "id": "source"}, {"type": "text", "id": "source_link"}, {"type": "text", "id": "notes"}, {"type": "text", "id": "explore"}, {"type": "text", "id": "units"}], "method": "insert", "primary_key": "code", "resource_id": "bfa6b55f-10b6-4ba2-8470-33bb9a5194a5"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=datastore_create"}') if 'update' not in url: return MockResponse(404, '{"success": false, "error": {"message": "TEST ERROR: Not update", "__type": "TEST ERROR: Not Update Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=resource_update"}') if datadict['name'] == 'MyResource1': resultdictcopy = copy.deepcopy(resultdict) merge_two_dictionaries(resultdictcopy, datadict) if files is not None: resultdictcopy['url_type'] = 'upload' resultdictcopy['resource_type'] = 'file.upload' filename = os.path.basename(files[0][1].name) resultdictcopy[ 'url'] = 'http://test-data.humdata.org/dataset/6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d/resource/de6549d8-268b-4dfe-adaf-a4ae5c8510d5/download/%s' % filename result = json.dumps(resultdictcopy) return MockResponse(200, '{"success": true, "result": %s, "help": "http://test-data.humdata.org/api/3/action/help_show?name=resource_update"}' % result) if datadict['name'] == 'MyResource2': return MockResponse(404, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=resource_update"}') if datadict['name'] == 'MyResource3': return MockResponse(200, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=resource_update"}') return MockResponse(404, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=resource_update"}') Configuration.read().remoteckan().session = MockSession()
def post_datastore(self): class MockSession(object): @staticmethod def post(url, data, headers, files, allow_redirects, auth=None): decodedata = data.decode('utf-8') datadict = json.loads(decodedata) if 'show' in url: return mockshow(url, datadict) if 'create' not in url and 'insert' not in url and 'upsert' not in url and 'delete' not in url and 'search' not in url: return MockResponse(404, '{"success": false, "error": {"message": "TEST ERROR: Not create or delete", "__type": "TEST ERROR: Not Create or Delete Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=datastore_action"}') if 'delete' in url and datadict['resource_id'] == 'datastore_unknown_resource': return MockResponse(404, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=datastore_delete"}') if 'delete' in url and datadict['resource_id'] == 'de6549d8-268b-4dfe-adaf-a4ae5c8510d5': TestResource.datastore = 'delete' return MockResponse(200, '{"success": true, "result": {"resource_id": "de6549d8-268b-4dfe-adaf-a4ae5c8510d5"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=resource_delete"}') if 'create' in url and datadict['resource_id'] == 'datastore_unknown_resource': return MockResponse(404, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=datastore_create"}') if 'search' in url and datadict['resource_id'] == 'datastore_unknown_resource': return MockResponse(404, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=datastore_create"}') if 'search' in url and datadict['resource_id'] == '_table_metadata': return MockResponse(200, '{"success": true, "result": {"include_total": true, "resource_id": "_table_metadata", "fields": [{"type": "int", "id": "_id"}, {"type": "name", "id": "name"}, {"type": "oid", "id": "oid"}, {"type": "name", "id": "alias_of"}], "records_format": "objects", "records": [{"_id":"f9cd60f3d7f2f6d0","name":"f9228459-d808-4b51-948f-68a5850abfde","oid":"919290","alias_of":null},{"_id":"7ae63490de9b7d7b","name":"af618a0b-09b8-42c8-836f-2be597e1ea34","oid":"135294","alias_of":null},{"_id":"1dc37f4e89988644","name":"748b40dd-7bd3-40a3-941b-e76f0bfbe0eb","oid":"117144","alias_of":null},{"_id":"2a554a61bd366206","name":"91c78d24-eab3-40b5-ba91-6b29bcda7178","oid":"116963","alias_of":null},{"_id":"fd787575143afe90","name":"9320cfce-4620-489a-bcbe-25c73867d4fc","oid":"107430","alias_of":null},{"_id":"a70093abd230f647","name":"b9d2eb36-e65c-417a-bc28-f4dadb149302","oid":"107409","alias_of":null},{"_id":"95fbdd2d06c07aea","name":"ca6a0891-8395-4d58-9168-6c44e17e0193","oid":"107385","alias_of":null}], "limit": 10000, "_links": {"start": "/api/action/datastore_search?limit=10000&resource_id=_table_metadata", "next": "/api/action/datastore_search?offset=10000&limit=10000&resource_id=_table_metadata"}, "total": 7}}') if ('create' in url or 'insert' in url or 'upsert' in url or 'search' in url) and datadict[ 'resource_id'] == 'de6549d8-268b-4dfe-adaf-a4ae5c8510d5': TestResource.datastore = 'create' return MockResponse(200, '{"success": true, "result": {"fields": [{"type": "text", "id": "code"}, {"type": "text", "id": "title"}, {"type": "float", "id": "value"}, {"type": "timestamp", "id": "latest_date"}, {"type": "text", "id": "source"}, {"type": "text", "id": "source_link"}, {"type": "text", "id": "notes"}, {"type": "text", "id": "explore"}, {"type": "text", "id": "units"}], "method": "insert", "primary_key": "code", "resource_id": "bfa6b55f-10b6-4ba2-8470-33bb9a5194a5"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=datastore_create"}') return MockResponse(404, '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=resource_delete"}') Configuration.read().remoteckan().session = MockSession()
def my_testfn(): testresult.actual_result = Configuration.read().get_hdx_site_url()
def configuration(hdx_config_yaml, project_config_yaml): Configuration._create(user_agent='test', hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml)
def my_testkeyfn(): testresult.actual_result = Configuration.read().get_api_key()
def my_testuafn(): testresult.actual_result = Configuration.read().user_agent
def my_excfn(): testresult.actual_result = Configuration.read().get_hdx_site_url() raise ValueError('Some failure!')
def configuration(self): Configuration._create(user_agent='test', hdx_key='12345', project_config_yaml=join('tests', 'config', 'project_configuration.yml')) Locations.set_validlocations([{'name': 'afg', 'title': 'Afghanistan'}, {'name': 'cmr', 'title': 'Cameroon'}]) Country.countriesdata(use_live=False)