def calculate_regional_average(cls, val_type, datadict, iso3): countryinfo = Country.get_country_info_from_iso3(iso3) level = 3 while level != 0: region_level = cls.region_levels[level] region_prefix = region_level column = Column.parse('#region+code+%s' % region_prefix) regioncode = countryinfo[column.get_display_tag( sort_attributes=True)] if regioncode: regioncode = int(regioncode) column = Column.parse('#region+%s+name+preferred' % region_prefix) regionname = countryinfo[column.get_display_tag( sort_attributes=True)] countries_in_region = Country.get_countries_in_region( regioncode) avg = cls.calculate_average(datadict, countries_in_region) if avg: logger.warning('%s: %s - Using %s (%s) average' % (iso3, val_type, regionname, region_level)) return avg, regioncode level -= 1 logger.warning('%s: %s - Using global average' % (iso3, val_type)) return cls.calculate_average(datadict), '001'
def test_get_country_info_from_iso3(self): assert Country.get_country_info_from_iso3('bih', use_live=False) == { '#country+alt+i_ar+name+v_unterm': 'البوسنة والهرسك', '#country+alt+i_en+name+v_unterm': 'Bonaire, Saint Eustatius and Saba', '#country+alt+i_es+name+v_unterm': 'Bosnia y Herzegovina', '#country+alt+i_fr+name+v_unterm': 'Bosnie-Herzégovine (la)', '#country+alt+i_ru+name+v_unterm': 'Босния и Герцеговина', '#country+alt+i_zh+name+v_unterm': '波斯尼亚和黑塞哥维那', '#country+alt+name+v_fts': '', '#country+alt+name+v_hrinfo_country': '', '#country+alt+name+v_iso': '', '#country+alt+name+v_m49': '', '#country+alt+name+v_reliefweb': '', '#country+alt+name+v_unterm': '', '#country+code+num+v_m49': '70', '#country+code+v_fts': '28', '#country+code+v_hrinfo_country': '208', '#country+code+v_iso2': 'BA', '#country+code+v_iso3': 'BIH', '#country+code+v_reliefweb': '40', '#country+name+preferred': 'Bosnia and Herzegovina', '#country+name+short+v_reliefweb': '', '#country+regex': 'herzegovina|bosnia', '#geo+admin_level': '0', '#geo+lat': '44.16506495', '#geo+lon': '17.79105724', '#meta+id': '28', '#region+code+intermediate': '', '#region+code+main': '150', '#region+code+sub': '39', '#region+intermediate+name+preferred': '', '#region+main+name+preferred': 'Europe', '#region+name+preferred+sub': 'Southern Europe' }
def get_iso2(s, use_live=False): if Country: iso3, fuzzy = Country.get_iso3_country_code_fuzzy(s, use_live) if iso3: return Country.get_country_info_from_iso3( iso3)["#country+code+v_iso2"] else: return s.upper()
def get_location(_, sa=False): if pd.isnull(_): return None loc = iso32loc[_] if 'Indian' in loc: return 'Asia' if not sa: return loc if 'Europe' in loc or 'Asia' in loc: return loc return Country.get_country_info_from_iso3(_)['Country or Area']
def test_get_country_info_from_iso3(self): assert Country.get_country_info_from_iso3('bih') == { '#country+alt+i_ar+name+v_unterm': 'البوسنة والهرسك', '#country+alt+i_en+name+v_unterm': 'Bosnia and Herzegovina', '#country+alt+i_es+name+v_unterm': 'Bosnia y Herzegovina', '#country+alt+i_fr+name+v_unterm': 'Bosnie-Herzégovine (la)', '#country+alt+i_ru+name+v_unterm': 'Босния и Герцеговина', '#country+alt+i_zh+name+v_unterm': '波斯尼亚和黑塞哥维那', '#country+alt+name+v_fts': '', '#country+alt+name+v_hrinfo_country': '', '#country+alt+name+v_iso': '', '#country+alt+name+v_m49': '', '#country+alt+name+v_reliefweb': '', '#country+alt+name+v_unterm': '', '#country+code+num+v_m49': '70', '#country+code+v_fts': '28', '#country+code+v_hrinfo_country': '208', '#country+code+v_iso2': 'BA', '#country+code+v_iso3': 'BIH', '#country+code+v_reliefweb': '40', '#country+name+preferred': 'Bosnia and Herzegovina', '#country+name+short+v_reliefweb': '', '#country+regex': 'herzegovina|bosnia', '#geo+admin_level': '0', '#geo+lat': '44.16506495', '#geo+lon': '17.79105724', '#meta+id': '28', '#region+code+intermediate': '', '#region+code+main': '150', '#region+code+sub': '39', '#region+intermediate+name+preferred': '', '#region+main+name+preferred': 'Europe', '#region+name+preferred+sub': 'Southern Europe'} assert Country.get_country_info_from_iso3('PSE') == { '#meta+id': '170', '#country+code+v_hrinfo_country': '351', '#country+code+v_reliefweb': '180', '#country+code+num+v_m49': '275', '#country+code+v_fts': '171', '#country+code+v_iso2': 'PS', '#country+code+v_iso3': 'PSE', '#country+name+preferred': 'State of Palestine', '#country+alt+name+v_m49': '', '#country+alt+name+v_iso': 'Palestine, State of', '#country+alt+name+v_unterm': '', '#country+alt+name+v_fts': 'occupied Palestinian territory', '#country+alt+name+v_hrinfo_country': 'occupied Palestinian territory', '#country+name+short+v_reliefweb': 'oPt', '#country+alt+name+v_reliefweb': 'occupied Palestinian territory', '#country+alt+i_en+name+v_unterm': 'Palestine', '#country+alt+i_fr+name+v_unterm': 'État de Palestine', '#country+alt+i_es+name+v_unterm': 'Estado de Palestina', '#country+alt+i_ru+name+v_unterm': 'Государство Палестина', '#country+alt+i_zh+name+v_unterm': '巴勒斯坦国', '#country+alt+i_ar+name+v_unterm': 'دولة فلسطين', '#geo+admin_level': '0', '#geo+lat': '31.99084142', '#geo+lon': '35.30744047', '#region+code+main': '142', '#region+main+name+preferred': 'Asia', '#region+code+sub': '145', '#region+name+preferred+sub': 'Western Asia', '#region+code+intermediate': '', '#region+intermediate+name+preferred': '', '#country+regex': 'palestin|\\bgaza|west.?bank', '#country+name+override': 'oPt'}
df = pd.read_csv(params.drm_tab, index_col=0, header=0, sep='\t') df = df.join(pd.read_csv(params.input_data, index_col=0, header=0, sep='\t'), how='outer') # Add Location info countries = df['Country Code'].unique() country2iso3 = { _: Country.get_iso3_country_code_fuzzy(_)[0] for _ in countries if not pd.isnull(_) } iso32info = { _: Country.get_country_info_from_iso3(_) for _ in country2iso3.values() } df['Country ISO3'] = df['Country Code'].apply( lambda _: country2iso3[_] if _ in country2iso3 else None) df['Continent'] = df['Country ISO3'].apply( lambda _: iso32info[_]['Region Name'] if _ in iso32info else None) df['Country'] = df['Country ISO3'].apply( lambda _: iso32info[_]['Country or Area'] if _ in iso32info else None) def get_location(_, sa=False): if pd.isnull(_): return None loc = iso32loc[_] if 'Indian' in loc:
def generate_country_dataset_and_showcase(downloader, folder, headersdata, countryiso, countrydata, indicator_datasets, tags): indicator_datasets_list = indicator_datasets.values() title = extract_list_from_list_of_dict(indicator_datasets_list, 'title') countryname = Country.get_country_name_from_iso3(countryiso) dataset = get_dataset('%s - %s' % (countryname, title[0]), tags, 'IDMC IDP data for %s' % countryname) try: dataset.add_country_location(countryiso) except HDXError as e: logger.exception('%s has a problem! %s' % (countryname, e)) return None, None, None description = extract_list_from_list_of_dict(indicator_datasets_list, 'notes') dataset['notes'] = get_matching_then_nonmatching_text(description, separator='\n\n', ignore='\n') methodology = extract_list_from_list_of_dict(indicator_datasets_list, 'methodology_other') dataset['methodology_other'] = get_matching_then_nonmatching_text( methodology) caveats = extract_list_from_list_of_dict(indicator_datasets_list, 'caveats') dataset['caveats'] = get_matching_then_nonmatching_text(caveats) years = set() bites_disabled = [True, True, True] for endpoint in countrydata: data = countrydata[endpoint] headers, hxltags = headersdata[endpoint] rows = [headers, hxltags] for row in data: newrow = list() for hxltag in hxltags: newrow.append(row.get(hxltag)) rows.append(newrow) year = row.get('#date+year') conflict_stock = row.get('#affected+idps+ind+stock+conflict') if conflict_stock: bites_disabled[0] = False conflict_new = row.get('#affected+idps+ind+newdisp+conflict') if conflict_new: bites_disabled[1] = False disaster_new = row.get('#affected+idps+ind+newdisp+disaster') if disaster_new: bites_disabled[2] = False if year is None: continue years.add(year) name = indicator_datasets[endpoint].get_resources()[0]['description'] resourcedata = { 'name': endpoint, 'description': '%s for %s' % (name, countryname) } filename = '%s_%s.csv' % (endpoint, countryname) dataset.generate_resource_from_rows(folder, filename, rows, resourcedata) years = sorted(list(years)) dataset.set_dataset_year_range(years[0], years[-1]) url = 'http://www.internal-displacement.org/countries/%s/' % countryname.replace( ' ', '-') try: downloader.setup(url) except DownloadError: altname = Country.get_country_info_from_iso3( countryiso)['#country+alt+i_en+name+v_unterm'] url = 'http://www.internal-displacement.org/countries/%s/' % altname try: downloader.setup(url) except DownloadError: return dataset, None, bites_disabled showcase = Showcase({ 'name': '%s-showcase' % dataset['name'], 'title': 'IDMC %s Summary Page' % countryname, 'notes': 'Click the image on the right to go to the IDMC summary page for the %s dataset' % countryname, 'url': url, 'image_url': 'http://www.internal-displacement.org/sites/default/files/logo_0.png' }) showcase.add_tags(tags) return dataset, showcase, bites_disabled