def test_search_in_hdx(self, configuration, search): datasets = Dataset.search_in_hdx(configuration, 'ACLED') assert len(datasets) == 10 datasets = Dataset.search_in_hdx(configuration, 'ajyhgr') assert len(datasets) == 0 with pytest.raises(HDXError): Dataset.search_in_hdx(configuration, '"')
def compare_dates(csvname, urlend, filename, docname, keyword): dataset = Dataset.read_from_hdx(urlend) datasets = Dataset.search_in_hdx(docname, rows=10) resources = Dataset.get_all_resources(datasets) if d2 > d1: url, path = resources[0].download( '/Users/katherinenewcomb/Desktop/TestingRepo') print('Resource URL %s downloaded to %s' % (url, path)) f = open( '/Users/katherinenewcomb/Desktop/TestingRepo/{}'.format(csvname), "w+") f.write(dataset.get_dataset_date()) shutil.move( '/Users/katherinenewcomb/Desktop/TestingRepo/{}'.format(filename), '/Users/katherinenewcomb/Desktop/TestingRepo/ArchiveData/{}'. format(filename)) newfile = glob.glob( '/Users/katherinenewcomb/Desktop/TestingRepo/*{}*'.format(keyword)) os.rename( glob.glob( '/Users/katherinenewcomb/Desktop/TestingRepo/*{}*'.format( 'poverty-statistic'))[0], '/Users/katherinenewcomb/Desktop/TestingRepo/' + (filename)) # print(newfile) else: newfile = "No new file" print(newfile) print('System Update Complete') return newfile
def datasets(configuration): ds = list() dataset = Dataset({ 'title': 'UNHCR Refugee Population Statistics', 'dataset_date': '12/31/2013' }) ds.append(dataset) dataset = Dataset({ 'title': 'UNHCR Global Trends: Forced Displacement in 2016 Data', 'dataset_date': '06/20/2017' }) dataset.add_update_resource( {'url': join('tests', 'fixtures', 'UNHCR-14-wrd-tab-v3-external.xls')}) ds.append(dataset) dataset = Dataset({ 'title': 'Global Forced Displacement Trends in 2014', 'dataset_date': '06/19/2015' }) ds.append(dataset) dataset = Dataset({ 'title': 'UNHCR Population of Concern from Colombia', 'dataset_date': '01/01/1975-12/01/2012' }) ds.append(dataset) return ds
def test_generate_resource_view(self): dataset = Dataset(TestAcledAfrica.dataset) resource = copy.deepcopy(TestAcledAfrica.resource) resource['id'] = '123' dataset.add_update_resource(resource) result = generate_resource_view(dataset) assert result == {'resource_id': '123', 'description': '', 'title': 'Quick Charts', 'view_type': 'hdx_hxl_preview', 'hxl_preview_config': '{"configVersion":4,"bites":[{"tempShowSaveCancelButtons":false,"ingredient":{"aggregateColumn":null,"valueColumn":"#affected+killed","aggregateFunction":"sum","dateColumn":null,"comparisonValueColumn":null,"comparisonOperator":null,"filters":{},"description":""},"type":"key figure","errorMsg":null,"computedProperties":{"title":"Sum of fatalities","dataTitle":"fatalities","unit":"k"},"uiProperties":{"internalColorPattern":["#1ebfb3","#0077ce","#f2645a","#9C27B0"],"title":"Total Fatalities","postText":"deaths","numberFormat":"us","unit":"none"},"dataProperties":{},"displayCategory":"Key Figures","hashCode":-1955043658},{"tempShowSaveCancelButtons":false,"ingredient":{"aggregateColumn":"#adm1+name","valueColumn":"#affected+killed","aggregateFunction":"sum","dateColumn":null,"comparisonValueColumn":null,"comparisonOperator":null,"filters":{},"description":""},"type":"chart","errorMsg":null,"computedProperties":{"pieChart":false,"title":"Sum of fatalities by admin1","dataTitle":"fatalities"},"uiProperties":{"swapAxis":true,"showGrid":false,"color":"#1ebfb3","sortingByValue1":"DESC","sortingByCategory1":null,"internalColorPattern":["#1ebfb3","#0077ce","#f2645a","#9C27B0"],"title":"Top 5 Regions for Fatalities","dataTitle":"deaths","limit":5},"dataProperties":{},"displayCategory":"Charts","hashCode":738289179},{"tempShowSaveCancelButtons":false,"ingredient":{"aggregateColumn":null,"valueColumn":"#affected+killed","aggregateFunction":"sum","dateColumn":"#date+occurred","comparisonValueColumn":null,"comparisonOperator":null,"filters":{},"description":""},"type":"timeseries","errorMsg":null,"computedProperties":{"pieChart":false,"filters":{"filterWith":[{"#affected+killed":"is not empty"}],"filterWithout":[]},"title":"Sum of fatalities by event_date","dataTitle":"fatalities"},"uiProperties":{"swapAxis":true,"showGrid":true,"color":"#0077ce","sortingByValue1":"DESC","sortingByCategory1":null,"showPoints":false,"internalColorPattern":["#1ebfb3","#0077ce","#f2645a","#9C27B0"],"title":"Fatalities over Time","dataTitle":"deaths"},"dataProperties":{},"displayCategory":"Timeseries","hashCode":2126517972}],"cookbookName":"generic"}'}
def pop_data_download(region_names, wp_year=2017): from hdx.utilities.easy_logging import setup_logging setup_logging() from hdx.hdx_configuration import Configuration Configuration.create(hdx_site='prod', user_agent='Read-only user', hdx_read_only=True) from hdx.data.dataset import Dataset import wpgpDownload from wpgpDownload.utils.convenience_functions import download_country_covariates as download_worldpop from wpgpDownload.utils.convenience_functions import refresh_csv refresh_csv() hdx_datasets = Dataset.search_in_hdx('hrsl', rows=500) hdx_resources = Dataset.get_all_resources(hdx_datasets) print('') country_names = set([region[0:3] for region in region_names]) for country in country_names: print(country) for res in hdx_resources: if 'population_'+country.lower() in res['name'] and '.zip' in res['name'] and 'csv' not in res['name']: print('Downloading HRSL',res['name'], end='\r') url, path = res.download() print('HRSL',res['name'],'download completed ') shutil.move(Path(path),Path('./'+country+'/misc_data/population_'+country.lower()+'.zip')) zipfile.ZipFile(Path('./'+country+'/misc_data/population_'+country.lower()+'.zip'), 'r').extractall(Path('./'+country+'/misc_data')) for file in Path('./'+country+'/misc_data').iterdir(): if 'population_'+country.lower() in file.name and file.suffix != '.tif': os.remove(file) if type(wp_year) == list: years = wp_year elif type(wp_year) == int: years = [wp_year] #NTL_files = [file for file in Path("./"+country+"/NTL").iterdir() if "NTL" in file.name] # #years = [] #for NTL_file in NTL_files: # years.append(NTL_file.name[4:8]) #years = [year for year in set(years)] #years.sort() for year in years: print('Downloading WorldPop '+country+' '+str(year)+'\t\t',end='\r') download_worldpop(ISO=country,out_folder='.\\'+country+'\\worldpop',prod_name='ppp_'+str(year)) print('WorldPop '+country+' '+str(year)+' download completed\t\t') print("") print('Done')
def test_read_from_hdx(self, configuration, read): dataset = Dataset.read_from_hdx(configuration, 'TEST1') assert dataset['id'] == '6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d' assert dataset['name'] == 'MyDataset1' assert dataset['dataset_date'] == '06/04/2016' assert len(dataset.resources) == 2 assert len(dataset.gallery) == 1 dataset = Dataset.read_from_hdx(configuration, 'TEST2') assert dataset is None dataset = Dataset.read_from_hdx(configuration, 'TEST3') assert dataset is None
def test_generate_resource_view(self): dataset = Dataset(TestDHS.dataset) resource = copy.deepcopy(TestDHS.resources[0]) resource['id'] = '123' resource['url'] = 'https://test-data.humdata.org/dataset/495bf9ef-afab-41ac-a804-ca5978aa4213/resource/703d04ef-1787-44b1-92d5-c4ddd283d33f/download/dhs-quickstats_national_afg.csv' dataset.add_update_resource(resource) resource_view = generate_resource_view(dataset, bites_disabled=[True, True, True]) assert resource_view is None resource_view = generate_resource_view(dataset, bites_disabled=[False, True, False]) assert resource_view == {'resource_id': '123', 'description': '', 'title': 'Quick Charts', 'view_type': 'hdx_hxl_preview', 'hxl_preview_config': '{"configVersion": 5, "bites": [{"tempShowSaveCancelButtons": false, "ingredient": {"valueColumn": "#indicator+value+num", "aggregateFunction": "sum", "dateColumn": null, "comparisonValueColumn": null, "comparisonOperator": null, "filters": {"filterWith": [{"#date+year": "$MAX$"}, {"#indicator+code": "CM_ECMR_C_IMR"}, {"#indicator+label+code": "14003"}]}, "title": "Infant Mortality Rate", "description": "Rate is for the period of 10 years preceding the survey"}, "type": "key figure", "errorMsg": null, "computedProperties": {"explainedFiltersMap": {}, "pieChart": false, "dataTitle": "Value"}, "uiProperties": {"swapAxis": true, "showGrid": true, "color": "#1ebfb3", "sortingByValue1": "DESC", "sortingByCategory1": null, "internalColorPattern": ["#1ebfb3", "#0077ce", "#f2645a", "#9C27B0"], "dataTitle": "Percent", "postText": "percent"}, "dataProperties": {}, "displayCategory": "Charts", "hashCode": -487125335}, {"tempShowSaveCancelButtons": false, "ingredient": {"valueColumn": "#indicator+value+num", "aggregateFunction": "sum", "dateColumn": null, "comparisonValueColumn": null, "comparisonOperator": null, "filters": {"filterWith": [{"#date+year": "$MAX$"}, {"#indicator+code": "ED_LITR_W_LIT"}]}, "title": "Women who are Literate", "description": ""}, "type": "key figure", "errorMsg": null, "computedProperties": {"explainedFiltersMap": {}, "pieChart": false, "dataTitle": "Value"}, "uiProperties": {"swapAxis": true, "showGrid": true, "color": "#1ebfb3", "sortingByValue1": "ASC", "sortingByCategory1": null, "internalColorPattern": ["#1ebfb3", "#0077ce", "#f2645a", "#9C27B0"], "dataTitle": "Percent", "postText": "percent"}, "dataProperties": {}, "displayCategory": "Charts", "hashCode": -539301812}], "recipeUrl": "https://raw.githubusercontent.com/mcarans/hxl-recipes/dev/recipes/dhs/recipe.json"}'}
def construct_dataset(origdata, origresources, maintainer=None, orgid=None, organization=None): dataset = Dataset(copy.deepcopy(origdata)) if maintainer: dataset['maintainer'] = maintainer if orgid: dataset['owner_org'] = orgid if organization: dataset['organization'] = organization dataset.add_update_resources(copy.deepcopy(origresources)) return dataset
def datasets(self): # noqa if self._datasets: return self._datasets self._datasets = {} for theme in self._feature_selection.themes: dataset = Dataset() name = '{}_{}'.format(self._dataset_prefix, theme) title = '{} {} (OpenStreetMap Export)'.format(self._name, theme) tags = [] caveats = '' if 'hdx' in self._feature_selection.doc[theme]: hdx = self._feature_selection.doc[theme]['hdx'] title = hdx.get('name') or title caveats = hdx.get('caveats', caveats) if 'tags' in hdx: tags = map(lambda tag: tag.strip(), hdx['tags'].split(',')) dataset['name'] = name dataset['title'] = title dataset['caveats'] = caveats dataset['private'] = self.is_private dataset['notes'] = self.hdx_note(theme) dataset['dataset_source'] = 'OpenStreetMap contributors' dataset.set_dataset_date_from_datetime(self._dataset_date) dataset['owner_org'] = '225b9f7d-e7cb-4156-96a6-44c9c58d31e3' dataset['license_id'] = self._license dataset['methodology'] = 'Other' dataset['methodology_other'] = 'Volunteered geographic information' dataset['data_update_frequency'] = str(self._data_update_frequency) dataset['subnational'] = str(int(self.subnational)) dataset['groups'] = [] # warning: this makes a network call [dataset.add_other_location(x) for x in self._locations] dataset.add_tags(tags) ga = GalleryItem({ 'title': 'OSM Analytics', 'description': 'View detailed information about OpenStreetMap edit history in this area.', 'url': self.osm_analytics_url, 'image_url': 'http://{}/static/ui/images/osm_analytics.png'.format( self.hostname), 'type': 'Visualization', }) dataset.add_update_galleryitem(ga) self._datasets[theme] = dataset return self._datasets
def test_add_update_delete_resources(self, configuration, post_delete): dataset_data = copy.deepcopy(TestDataset.dataset_data) resources_data = copy.deepcopy(TestDataset.resources_data) dataset = Dataset(dataset_data) dataset.add_update_resources(resources_data) assert len(dataset.resources) == 2 dataset.delete_resource('NOTEXIST') assert len(dataset.resources) == 2 dataset.delete_resource('de6549d8-268b-4dfe-adaf-a4ae5c8510d5') assert len(dataset.resources) == 1 resources_data = copy.deepcopy(TestDataset.resources_data) resource = Resource(resources_data[0]) resource.set_file_to_upload('lala') dataset.add_update_resource(resource) assert dataset.resources[1].get_file_to_upload() == 'lala'
def get_dataset_from_hdx(hdx_address: str, dataset_name: str, output_filename: str): """ Use the HDX API to download a daset based on the address and dataset ID :param hdx_address: The HDX address of the dataset :param dataset_name: The name of the dataset :param save_filepath: The desired full filepath of the downloaded file :param cache_days: How many days to cache the file (temporary for development) """ HDX_SITE = 'prod' USER_AGENT = 'MapAction' Configuration.create(hdx_site=HDX_SITE, user_agent=USER_AGENT, hdx_read_only=True) logger = logging.getLogger(__name__) # TODO: make more generic caching ability # file_age_days = utils.get_file_age_days(save_filepath) # if 0 < file_age_days < cache_days: # return save_filepath logger.info(f'Querying HDX API for dataset {hdx_address}') resources = Dataset.read_from_hdx(hdx_address).get_resources() for resource in resources: if resource['name'] == dataset_name: _, download_filepath = resource.download() copy_file(source_path=download_filepath, target_path=output_filename) save_file(output_filename) logger.info(f'Saved to {output_filename}') return output_filename raise HDXDatasetNotFound( f'HDX dataset with address "{hdx_address}" and name "{dataset_name}" not found' )
def test_get_add_location(self, configuration, read): dataset = Dataset.read_from_hdx(configuration, 'TEST1') assert dataset['groups'] == resultgroups assert dataset.get_location() == ['Algeria', 'Zimbabwe'] dataset.add_country_location('sdn') expected = copy.deepcopy(resultgroups) expected.append({'name': 'sdn'}) assert dataset['groups'] == expected assert dataset.get_location() == ['Algeria', 'Zimbabwe', 'Sudan'] dataset.add_country_location('dza') assert dataset['groups'] == expected assert dataset.get_location() == ['Algeria', 'Zimbabwe', 'Sudan'] dataset.add_country_locations(['KEN', 'moz', 'ken']) expected.extend([{'name': 'ken'}, {'name': 'moz'}]) assert dataset['groups'] == expected assert dataset.get_location() == [ 'Algeria', 'Zimbabwe', 'Sudan', 'Kenya', 'Mozambique' ] dataset.add_continent_location('af') assert len(dataset['groups']) == 58 assert len(dataset.get_location()) == 58 del dataset['groups'] assert dataset.get_location() == [] with pytest.raises(HDXError): dataset.add_country_location('lala') dataset.add_country_location('ukr') assert dataset['groups'] == [{'name': 'ukr'}] assert dataset.get_location() == ['Ukraine']
def Download2Comp(self, keyword, fformat, dest): """ Checking the metadata of the datasets returned to see if they are the data that we desire, by checking against two keywords """ # Get the data information attached to each dataset self.resources = Dataset.get_all_resources(self.valid_datasets) # Getting the relevant data attached to each dataset get_data = '' for i in range(len(self.valid_datasets)): # Check all data attached to each dataset all_data = self.valid_datasets[i].get_resources() for j in range(len(all_data)): # Take data if it matches the keyword and format desired if keyword in all_data[j]['name'] and fformat in all_data[j][ 'format']: get_data = self.valid_datasets[i].get_resource(index=j) try: # Download it get_data['format'] = '' url, path = get_data.download(folder=dest) print('Resource URL %s downloaded to %s' % (url, path)) get_data = '' # Clear variable to avoid duplicate downloads in the event of failure except: print('Data not valid for download.')
def read_hdx_metadata(datasetinfo, today=None): # type: (Dict, Optional[datetime]) -> None """Read metadata from HDX dataset and add to input dictionary Args: datasetinfo (Dict): Dictionary of information about dataset today (Optional[datetime]): Value to use for today. Defaults to None (datetime.now()). Returns: None """ dataset_name = datasetinfo['dataset'] dataset = Dataset.read_from_hdx(dataset_name) url = datasetinfo.get('url') if not url: resource_name = datasetinfo.get('resource') format = datasetinfo['format'] for resource in dataset.get_resources(): if resource['format'] == format.upper(): if resource_name and resource['name'] != resource_name: continue url = resource['url'] break if not url: raise ValueError('Cannot find %s resource in %s!' % (format, dataset_name)) datasetinfo['url'] = url if 'date' not in datasetinfo: datasetinfo['date'] = get_date_from_dataset_date(dataset, today=today) if 'source' not in datasetinfo: datasetinfo['source'] = dataset['dataset_source'] if 'source_url' not in datasetinfo: datasetinfo['source_url'] = dataset.get_hdx_url()
def main(): """Generate dataset and create it in HDX""" for dataset in Dataset.get_all_datasets( check_duplicates=False ): # [Dataset.read_from_hdx('malawi-other')]: changed, error = dataset.clean_dataset_tags() if changed and not error: if real_run: try: logger.info('%s: Updating dataset in HDX' % dataset['name']) dataset['batch_mode'] = 'KEEP_OLD' dataset['skip_validation'] = True dataset.update_in_hdx(update_resources=False, hxl_update=False) except HDXError as ex: logger.exception(ex) if not dataset.get_tags(): if dataset['private']: privatepublic = 'private' else: privatepublic = 'public' logger.warning('%s (%s) has no tags!' % (dataset['name'], privatepublic))
def delete_package_from_hdx(context, data_dict): check_access('package_update', context) id = data_dict.get('id') if not id: raise ValidationError('Dataset id is missing!') try: data = logic.get_action('package_show')({ 'ignore_auth': True }, { 'id': id }) hdx_dataset = Dataset.read_from_hdx(data['name']) if hdx_dataset: hdx_dataset.delete_from_hdx() data['hdx_name'] = "" try: toolkit.get_action('package_update')(context, data) except ValidationError as e: try: raise ValidationError(e.error_dict) except (KeyError, IndexError): raise ValidationError(e.error_dict) return return "Dataset not found!" except Exception as e: log.debug(e) return "Please try again!"
def test_add_update_delete_gallery(self, configuration, post_delete): dataset_data = copy.deepcopy(TestDataset.dataset_data) gallery_data = copy.deepcopy(TestDataset.gallery_data) dataset = Dataset(configuration, dataset_data) dataset.add_update_gallery(gallery_data) assert len(dataset.gallery) == 1 dataset.delete_galleryitem('NOTEXIST') dataset.delete_galleryitem('d59a01d8-e52b-4337-bcda-fceb1d059bef') assert len(dataset.gallery) == 0
def generate_dataset(configuration, countryName): #showedName = countryName if (countryName == "Ivory Coast"): showedName = "Cote d'Ivoire" name = countryName + '-healthsites' title = countryName + '-healthsites' slugified_name = slugify(name).lower() # dataset = Dataset(configuration, { # }) dataset = Dataset({ 'name': slugified_name, 'title': title, }) # dataset['name'] = slugified_name # dataset['title'] = title #generating the datasets getCountryHealthSites(configuration, countryName) # geojson resource if (os.path.isfile(configuration.read()['data_folder'] + countryName + '.geojson')): rName = countryName + '-healthsites-geojson' geojsonResource = Resource() geojsonResource['name'] = rName geojsonResource['format'] = 'geojson' geojsonResource['url'] = configuration.read()['base_url'] geojsonResource['description'] = countryName + ' healthsites geojson' geojsonResource.set_file_to_upload( configuration.read()['data_folder'] + countryName + '.geojson') geojsonResource.check_required_fields(['group', 'package_id']) dataset.add_update_resource(geojsonResource) #csv resource if (os.path.isfile(configuration.read()['data_folder'] + countryName + '.csv')): resource_csv = Resource() resource_csv['name'] = countryName + '-healthsites-csv' resource_csv['description'] = countryName + ' healthsites csv' resource_csv['format'] = 'csv' resource_csv.set_file_to_upload(configuration.read()['data_folder'] + countryName + '.csv') resource_csv.check_required_fields(['group', 'package_id']) dataset.add_update_resource(resource_csv) # shp resource if (os.path.isfile(configuration.read()['data_folder'] + countryName + "-shapefiles.zip")): resource_shp = Resource() resource_shp['name'] = countryName + '-healthsites-shp' resource_shp['format'] = 'zipped shapefile' resource_shp['description'] = countryName + ' healthsites shapefiles' resource_shp.set_file_to_upload(configuration.read()['data_folder'] + countryName + "-shapefiles.zip") resource_shp.check_required_fields(['group', 'package_id']) dataset.add_update_resource(resource_shp) return dataset
def get_new_date(urlend, docname): # Gets specific url for indicated category Configuration.create(hdx_site='prod', user_agent='A_Quick_Example', hdx_read_only=True) dataset = Dataset.read_from_hdx(urlend) datasets = Dataset.search_in_hdx(docname, rows=10) resources = Dataset.get_all_resources(datasets) # Creates variable for most updated version of dataset date y = dataset.get_dataset_date() # Gets year, month, and day of dataset year1 = y[:4] month1 = y[5:7] day1 = y[8:10] # Organizes dataset date into datetime format global d2 d2 = datetime.datetime(int(year1), int(month1), int(day1))
def test_add_update_delete_resources(self, configuration, post_delete): dataset_data = copy.deepcopy(TestDataset.dataset_data) resources_data = copy.deepcopy(TestDataset.resources_data) dataset = Dataset(configuration, dataset_data) dataset.add_update_resources(resources_data) assert len(dataset.resources) == 2 dataset.delete_resource('NOTEXIST') assert len(dataset.resources) == 2 dataset.delete_resource('de6549d8-268b-4dfe-adaf-a4ae5c8510d5') assert len(dataset.resources) == 1
def get_url(self): Configuration.create(hdx_site='prod', user_agent='A_Quick_Example', hdx_read_only=True) dataset = Dataset.read_from_hdx('movement-range-maps') resources = dataset.get_resources() dic = resources[1] self.url = dic['download_url'] return self
def get_date_from_dataset_date(dataset): if isinstance(dataset, str): dataset = Dataset.read_from_hdx(dataset) date_type = dataset.get_dataset_date_type() if date_type == 'range': return dataset.get_dataset_end_date(date_format='%Y-%m-%d') elif date_type == 'date': return dataset.get_dataset_date(date_format='%Y-%m-%d') return None
def main(): conf = Configuration() countries = { 'Benin': "BEN", # 'Burkina Faso': "BFA", # 'Burundi': "BDI", # 'Congo': "COG", # 'Ivory Coast': "CIV", 'Ghana': "GHA" # 'Guinea': "GIN", # 'Guinea-bissau': "GNB", # 'Gambia': "GMB", # 'Liberia': "LBR", # 'Mali': "MLI", # 'Mauritania': "MRT", # 'Malawi':"MWI", # 'Marocco': "MAR", # 'Niger': "NER", # 'Nigeria': "NGA", # 'Senegal':"SEN", # 'Sierra Leone': "SLE", # 'Togo': "TGO", # 'Cameroon': "CMR", # 'Central African Republic':"CAR", # 'Tanzania':"TZA", # 'Rwanda': "RWA", # 'Somalia': "SOM", # 'South Sudan': "SSD", # 'Yemen': "YEM", # 'Democratic Republic of The Congo': "COD", # 'Uganda': "UGA", # 'Zambia': "ZMB", # 'Angola': "AGO", # 'Kenya': "KEN", # 'Ethiopia': "ETH" # 'Algeria': "DZA", # 'Egypt': "EGY", # 'Tunisia':"TUN" # 'Haiti': "HTI", # 'Libya': "LBY", # 'Sudan': "SDN", # 'Bangladesh': "BGD", # 'Djibouti': "DJI", # 'Gabon': "GAB", # 'Madagascar': "MDG", # 'Lesotho': "LSO", # 'Namibia': "NAM", # 'Zimbabwe': "ZWE", # 'Mozambique': "MOZ", # 'Botswana': "BWA" # 'Palestine': "PSE", # 'Mauritius' : "MUS", # 'Zambia' : "ZMB" } dataset = Dataset.read_from_hdx('zimbabwe-healthsites')
def sync_datasets(datasets, update_dataset_date=False): for dataset in datasets: exists = Dataset.read_from_hdx(dataset['name']) if exists: if update_dataset_date: dataset.set_dataset_date_from_datetime(datetime.now()) dataset.update_in_hdx() else: dataset.set_dataset_date_from_datetime(datetime.now()) dataset.create_in_hdx(allow_no_resources=True)
def updateTag(iso2): #https://data.humdata.org/dataset/unhabitat-zw-indicators iso = iso2.lower() url = "unhabitat-%s-indicators" % iso print(url) dataset = Dataset.read_from_hdx("unhabitat-%s-indicators" % iso) # print(dataset) dataset.add_tag('INDICATORS') dataset.update_in_hdx()
def sync_datasets(self, update_dataset_date=False): # noqa for dataset in self.datasets.values(): exists = Dataset.read_from_hdx(dataset['name']) if exists: if update_dataset_date: dataset.set_dataset_date_from_datetime(datetime.now()) dataset.update_in_hdx() else: dataset.set_dataset_date_from_datetime(datetime.now()) dataset.create_in_hdx(allow_no_resources=True)
def check_type(dataset, file_types=[]): temp_dataset = Dataset.read_from_hdx(dataset) temp_dataset.separate_resources() if (len(temp_dataset.resources) > 0): if (len(file_types) > 0): if (not set(temp_dataset.get_filetypes()).isdisjoint(file_types)): return True else: return True return False
def test_update_yaml(self, configuration, static_yaml): dataset_data = copy.deepcopy(TestDataset.dataset_data) dataset = Dataset(configuration, dataset_data) assert dataset['name'] == 'MyDataset1' assert dataset['author'] == 'AN Other' dataset.update_yaml(static_yaml) assert dataset['name'] == 'MyDataset1' assert dataset['author'] == 'acled' assert dataset.get_resources() == [{"id": "ABC", "description": "Resource1", "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", "name": "Resource1", "url": "http://resource1.xlsx", "format": "xlsx"}, {"id": "DEF", "description": "Resource2", "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", "name": "Resource2", "url": "http://resource2.csv", "format": "csv"}] assert dataset.get_gallery() == [{'image_url': 'http://docs.hdx.rwlabs.org/wp-content/uploads/acled_visual.png', 'url': 'http://www.acleddata.com/visuals/maps/dynamic-maps/', 'type': 'visualization', 'title': 'Dynamic Map: Political Conflict in Africa', 'description': 'ACLED maps'}]
def process_mobility(self): print("Processing Mobility indices data ...") Configuration.create(hdx_site='prod', user_agent='A_Quick_Example', hdx_read_only=True) dataset = Dataset.read_from_hdx('movement-range-maps') resources = dataset.get_resources() dic = resources[1] url_mobility = dic['download_url'] self.file_mobility = "/home/ludo915/code/covsco/data/train/mobility/fr/mvt_range.zip" download_url(url_mobility, self.file_mobility) with ZipFile(self.file_mobility, 'r',) as zipf: zipf.printdir() print('Extracting mv_range file now...') mvt_range = zipf.namelist()[-1] zipf.extract(mvt_range,"/home/ludo915/code/covsco/data/train/mobility/fr/") print('Done!') os.chdir("/home/ludo915/code/covsco/data/train/mobility/fr/") os.system("""grep "FRA" """+ mvt_range + """ > mouvement-range-FRA.txt""") os.system("""head -n 1 """+ mvt_range + """ > header.txt""") os.system("""cat header.txt mouvement-range-FRA.txt > mouvement-range-FRA-final.csv""") os.chdir("/home/ludo915/code/covsco/scripts") self.df = pd.read_csv("/home/ludo915/code/covsco/data/train/mobility/fr/mouvement-range-FRA-final.csv", sep = '\t') print(self.df) self.df["ds"]=pd.to_datetime(self.df["ds"], dayfirst = True) self.df['polygon_name'] = self.df['polygon_name'].replace( {'Ile-de-France': 'Île-de-France',\ '-le-de-France': 'Île-de-France',\ "Auvergne-Rh-ne-Alpes":"Auvergne-Rhône-Alpes",\ "Bourgogne-Franche-Comt-":"Bourgogne-Franche-Comté",\ "Provence-Alpes-C-te d'Azur":"Provence-Alpes-Côte d'Azur"}) self.df2 = pd.read_csv('/home/ludo915/code/covsco/data/train/all_data_merged/fr/Enriched_Covid_history_data.csv') self.df2["date"]=pd.to_datetime(self.df2["date"]) self.df3 = pd.read_csv("/home/ludo915/code/covsco/data/train/pop/fr/regions_departements.csv", sep = ";") self.df.reset_index(inplace= True) self.df2.reset_index(inplace = True) self.df3.reset_index(inplace = True) self.df.drop(columns = ["index"],inplace = True) self.df2.drop(columns = ["index"],inplace = True) self.df3.drop(columns = ["index"],inplace = True) self.df2 = self.df2.merge(self.df3, how='inner', left_on = "numero", right_on = "depnum",suffixes=("","_y")) self.df2 = self.df2.merge(self.df, how ="outer", left_on = ["Region","date"], right_on = ["polygon_name","ds"],suffixes=("","_y")).dropna() print(self.df2) self.df2.to_csv("/home/ludo915/code/covsco/data/train/all_data_merged/fr/Enriched_Covid_history_data.csv", index = False) print('OK') return None
def get_resources(url_end, csv_filename, docname, keyword): Configuration.create(hdx_site='prod', user_agent='A_Quick_Example', hdx_read_only=True) # Gets web url dataset = Dataset.read_from_hdx(url_end) # Writes Dataset Date in dependencydate csv f = open( '/Users/katherinenewcomb/Desktop/TestingRepo/{}'.format(csv_filename), "w+") f.write(dataset.get_dataset_date()) # Searches for specific file on web url datasets = Dataset.search_in_hdx(docname, rows=10) # Grabs resources from file global resources resources = Dataset.get_all_resources(datasets) # Only uncomment if you want to download file!! url, path = resources[0].download( '/Users/katherinenewcomb/Desktop/TestingRepo') print('Resource URL %s downloaded to %s' % (url, path))
def sync_datasets(self): # noqa for dataset in self.datasets.values(): try: exists = Dataset.read_from_hdx(dataset['name']) if exists: dataset.update_in_hdx() else: dataset.create_in_hdx() except Exception as e: client.captureException() LOG.warn(e) LOG.warn(traceback.format_exc())
def generate_dataset(configuration): url = configuration['base_url'] + configuration['api'] loaData.writeData(url) name = 'Africa health facilities' title = 'Africa health facilities data' slugified_name = slugify(name).lower() dataset = Dataset(configuration, {}) dataset['name'] = slugified_name dataset['title'] = title date = time.strftime("%d/%m/%Y") dataset['dataset_date'] = date dataset.add_continent_location('AF') rName = "sen-healthfacilities" resource = Resource() resource['name'] = rName resource['format'] = 'geojson' resource['url'] = url resource['description'] = configuration['base_url'] resource['url_type'] = 'api' resource['resource_type'] = 'api' resource.set_file_to_upload(configuration['data_folder'] + 'sen-healthfacilities.geojson') dataset.add_update_resource(resource) return dataset
def test_create_in_hdx(self, configuration, post_create): dataset = Dataset(configuration) with pytest.raises(HDXError): dataset.create_in_hdx() dataset['id'] = 'TEST1' dataset['name'] = 'LALA' with pytest.raises(HDXError): dataset.create_in_hdx() dataset_data = copy.deepcopy(TestDataset.dataset_data) dataset = Dataset(configuration, dataset_data) dataset.create_in_hdx() assert dataset['id'] == '6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d' assert len(dataset.resources) == 2 assert len(dataset.gallery) == 0 dataset_data['name'] = 'MyDataset2' dataset = Dataset(configuration, dataset_data) with pytest.raises(HDXError): dataset.create_in_hdx() dataset_data['name'] = 'MyDataset3' dataset = Dataset(configuration, dataset_data) with pytest.raises(HDXError): dataset.create_in_hdx() dataset_data = copy.deepcopy(TestDataset.dataset_data) gallery_data = copy.deepcopy(TestDataset.gallery_data) dataset_data["gallery"] = gallery_data with pytest.raises(HDXError): dataset = Dataset(configuration, dataset_data) del dataset_data["gallery"] dataset = Dataset(configuration, dataset_data) dataset.add_update_gallery(gallery_data) dataset.create_in_hdx() assert dataset['id'] == '6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d' assert len(dataset.resources) == 2 assert len(dataset.gallery) == 1
def generate_dataset_and_showcase(acled_url, hxlproxy_url, downloader, countrydata): """ Create HXLated URLs to ACLED API eg. https://data.humdata.org/hxlproxy/data.csv?name=ACLEDHXL&url=https%3A//api.acleddata.com/acled/read.csv%3Flimit%3D0%26iso%3D120&tagger-match-all=on&tagger-02-header=iso&tagger-02-tag=%23country%2Bcode&tagger-03-header=event_id_cnty&tagger-03-tag=%23event%2Bcode&tagger-05-header=event_date&tagger-05-tag=%23date%2Boccurred+&tagger-08-header=event_type&tagger-08-tag=%23event%2Btype&tagger-09-header=actor1&tagger-09-tag=%23group%2Bname%2Bfirst&tagger-10-header=assoc_actor_1&tagger-10-tag=%23group%2Bname%2Bfirst%2Bassoc&tagger-12-header=actor2&tagger-12-tag=%23group%2Bname%2Bsecond&tagger-13-header=assoc_actor_2&tagger-13-tag=%23group%2Bname%2Bsecond%2Bassoc&tagger-16-header=region&tagger-16-tag=%23region%2Bname&tagger-17-header=country&tagger-17-tag=%23country%2Bname&tagger-18-header=admin1&tagger-18-tag=%23adm1%2Bname&tagger-19-header=admin2&tagger-19-tag=%23adm2%2Bname&tagger-20-header=admin3&tagger-20-tag=%23adm3%2Bname&tagger-21-header=location&tagger-21-tag=%23loc%2Bname&tagger-22-header=latitude&tagger-22-tag=%23geo%2Blat&tagger-23-header=longitude&tagger-23-tag=%23geo%2Blon&tagger-25-header=source&tagger-25-tag=%23meta%2Bsource&tagger-27-header=notes&tagger-27-tag=%23description&tagger-28-header=fatalities&tagger-28-tag=%23affected%2Bkilled&header-row=1 """ countryname = countrydata['countryname'] title = '%s - Conflict Data' % countryname logger.info('Creating dataset: %s' % title) slugified_name = slugify('ACLED Data for %s' % countryname).lower() countryiso = countrydata['iso3'] dataset = Dataset({ 'name': slugified_name, 'title': title, }) dataset.set_maintainer('8b84230c-e04a-43ec-99e5-41307a203a2f') dataset.set_organization('b67e6c74-c185-4f43-b561-0e114a736f19') dataset.set_expected_update_frequency('Live') dataset.set_subnational(True) dataset.add_country_location(countryiso) tags = ['HXL', 'conflicts', 'political violence', 'protests'] dataset.add_tags(tags) acled_country_url = '%siso=%d' % (acled_url, countrydata['m49']) url = '%surl=%s%s' % (hxlproxy_url, quote_plus(acled_country_url), hxlate) earliest_year = 10000 latest_year = 0 for row in downloader.get_tabular_rows(acled_country_url, dict_rows=True, headers=1): year = int(row['year']) if year < earliest_year: earliest_year = year if year > latest_year: latest_year = year if latest_year == 0: logger.warning('%s has no data!' % countryname) return None, None resource = { 'name': 'Conflict Data for %s' % countryname, 'description': 'Conflict data with HXL tags', 'format': 'csv', 'url': url } dataset.add_update_resource(resource) dataset.set_dataset_year_range(earliest_year, latest_year) showcase = Showcase({ 'name': '%s-showcase' % slugified_name, 'title': 'Dashboard for %s' % countrydata['countryname'], 'notes': 'Conflict Data Dashboard for %s' % countrydata['countryname'], 'url': 'https://www.acleddata.com/dashboard/#%03d' % countrydata['m49'], 'image_url': 'https://www.acleddata.com/wp-content/uploads/2018/01/dash.png' }) showcase.add_tags(tags) return dataset, showcase
def test_update_in_hdx(self, configuration, post_update): dataset = Dataset(configuration) dataset['id'] = 'NOTEXIST' with pytest.raises(HDXError): dataset.update_in_hdx() dataset['name'] = 'LALA' with pytest.raises(HDXError): dataset.update_in_hdx() dataset = Dataset.read_from_hdx(configuration, 'TEST1') assert dataset['id'] == '6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d' assert dataset['dataset_date'] == '06/04/2016' dataset['dataset_date'] = '02/26/2016' dataset['id'] = 'TEST1' dataset['name'] = 'MyDataset1' dataset.update_in_hdx() assert dataset['id'] == 'TEST1' assert dataset['dataset_date'] == '02/26/2016' dataset['id'] = 'NOTEXIST' with pytest.raises(HDXError): dataset.update_in_hdx() del dataset['id'] with pytest.raises(HDXError): dataset.update_in_hdx() dataset_data = copy.deepcopy(TestDataset.dataset_data) gallery_data = copy.deepcopy(TestDataset.gallery_data) dataset_data['name'] = 'MyDataset1' dataset_data['id'] = 'TEST1' dataset = Dataset(configuration, dataset_data) dataset.add_update_gallery(gallery_data) dataset.create_in_hdx() assert dataset['id'] == 'TEST1' assert dataset['dataset_date'] == '03/23/2016' assert len(dataset.resources) == 2 assert len(dataset.gallery) == 1 dataset.update_in_hdx() assert len(dataset.resources) == 2 assert len(dataset.gallery) == 1
def test_delete_from_hdx(self, configuration, post_delete): dataset = Dataset.read_from_hdx(configuration, 'TEST1') dataset.delete_from_hdx() del dataset['id'] with pytest.raises(HDXError): dataset.delete_from_hdx()