def update_caches(): '''Update internal metadata caches. This needs to be called prior to any fetch from an economy endpoint ''' global _localized_metadata, _iso2Codes, _class_data, _aggs if _localized_metadata.get(w.lang): # nothing to do return # update translation data here except city names db = {} for elem in ['region', 'incomelevel', 'lendingtype']: for row in w.fetch(elem): if 'name' in row: db[row['code']] = row['name'].strip() else: db[row['id']] = row['value'].strip() _iso2Codes[row['id']] = row['iso2code'] _localized_metadata[w.lang] = db url = 'country/all' if type(_class_data) is not dict: # initialize objects _class_data = {} _aggs = set() # here, we update codes and city translations simultaneously for row in w.fetch(url): _iso2Codes[row['id']] = row['iso2Code'] _localized_metadata[w.lang]['capitalCity:' + row['id']] = ( row['capitalCity'].strip() or _empty_meta_value) db = {'aggregate': row['region']['id'] == 'NA'} for key in ['longitude', 'latitude']: db[key] = float(row[key]) if len(row[key]) else None for key in ['region', 'adminregion', 'lendingType', 'incomeLevel']: db[key] = _empty_meta_value if db['aggregate'] else ( row[key]['id'] or _empty_meta_value) _class_data[row['id']] = db if db['aggregate']: _aggs.add(row['id']) _aggs.add(row['iso2Code']) # add one dummy that we can match to unrecognized economy codes db = _class_data['USA'] _class_data['___'] = {k: None for k in db.keys()} else: # else, just update city codes for row in w.fetch(url): _localized_metadata[w.lang]['capitalCity:' + row['id']] = ( row['capitalCity'].strip() or _empty_meta_value)
def list(id='all', q=None, group=None): '''Return a list of regions Arguments: id: a region identifier or list-like of identifiers q: search string (on region name) group: subgroup to return. Can be one of: 'admin', 'geo', 'allincomelevels', 'demodividend', 'smallstates', 'other' NB: technically possible to pass 'lending' but the returned values generally aren't useful Returns: a generator object Example: regions = {row['code']: row['name'] for row in wbgapi.region.list()} Notes: The region list is global to the entire API and is not specific to the current database. ''' params = {'type': group} if group else {} q, _ = utils.qget(q) for row in w.fetch('region/' + w.queryParam(id), params): if utils.qmatch(q, row['name']): yield row
def normalize(i): ilist = [] if i is None: return ilist for elem in i: if type(elem) is str: elem = {'id': elem} # this is an atypical way to fetch attributes but only the old-style endpoints give us precision attrs = list(wb.fetch('country/{}/indicator/{}'.format('USA', elem['id']), {'mrv': 1}))[0] if 'name' not in elem: elem['name'] = attrs['indicator']['value'] if 'source' not in elem: elem['source'] = 2 # default to WDI if 'multiplier' not in elem: elem['multiplier'] = 1 if 'precision' not in elem: elem['precision'] = attrs['decimal'] ilist.append(elem) return ilist
def coder_report(economies, full=True): c = { row['id']: row['name'] for row in w.fetch('country/all', lang='en') if row['region']['id'] != 'NA' } rows = [('ORIGINAL NAME', 'WBG NAME', 'ISO_CODE')] for k, v in economies.items(): if v: wb_name = c.get(v, '') if not full and wb_name.lower() == k.lower(): continue else: wb_name = '' rows.append((k, wb_name, v)) maxName1 = len( functools.reduce(lambda a, b: a if len(a) > len(b) else b, [row[0] for row in rows])) maxName2 = len( functools.reduce(lambda a, b: a if len(a) > len(b) else b, [row[1] for row in rows])) output = [] for row in rows: output.append([row[0], row[1], row[2]]) return output
def list(id='all', q=None): '''Return a list of databases Arguments: id: a database identifier or list-like q: search string (on source name) Returns: a generator object Example: for elem in wbgapi.source.list(): print(elem['id'], elem['name'], elem['lastupdated']) ''' id = w.queryParam(id) if not id: return [] q,_ = utils.qget(q) for row in w.fetch(_sourceurl(id), {'databid': 'y'}): if utils.qmatch(q, row['name']): yield row
def list(id='all'): '''Return a list of lending groups Arguments: id: a lending group identifier or list-like of identifiers Returns: a generator object Example: lendingGroups = {row['id']: row['value'] for row in wbapi.lending.list()} ''' for row in w.fetch('lendingtype/' + w.queryParam(id)): yield row
def concepts(db=None): '''Retrieve the concepts for the specified database. This function also implements support for alternate dimension names for the 3 primary dimensions Arguments: db: the database ID (e.g., 2=WDI). Default to the global database Returns: a dictionary of concepts: keys are URL friendly Example: for k,v in wbgapi.source.concepts(2).items(): print(k, v['key'], v['value']) ''' global _concepts if db is None: db = w.db db = int(db) c = _concepts.get(db) if c is not None: return c url = 'sources/{}/concepts'.format(db) c = {} for row in w.fetch(url, concepts=True): key = urllib.parse.quote(row['id']).lower() # there's currently an extra space at the end of "receiving countries" - we support a trimmed version # in the event this gets quietly fixed someday if key in [ 'country', 'admin%20region', 'states', 'provinces', 'receiving%20countries%20', 'receiving%20countries' ]: id = 'economy' elif key in ['year']: id = 'time' elif key in ['indicator']: id = 'series' else: id = key id = re.sub(r'[\-\.,:!]', '_', id) # neutralize special characters c[id] = {'key': key, 'value': row['value']} _concepts[db] = c return c
def members(id): '''Return a set of series identifiers that are members of the specified topic Arguments: id: a topic identifier Returns: a set object of series identifiers ''' e = set() for row in w.fetch('topic/{}/indicator'.format(w.queryParam(id)), {'source': w.db}): e.add(row['id']) return e
def list(id='all'): '''Return a list of databases Arguments: id: a database identifier or list-like Returns: a generator object Example: for elem in wbgapi.source.list(): print(elem['id'], elem['name'], elem['lastupdated']) ''' id = w.queryParam(id) if not id: return [] return w.fetch(_sourceurl(id), {'databid': 'y'})
def list(id='all', group=None): '''Return a list of regions Arguments: id: a region identifier or list-like of identifiers group: subgroup to return. Can be one of: 'admin', 'geo', 'allincomelevels', 'demodividend', 'smallstates', 'other' NB: technically possible to pass 'lending' but the returned values generally aren't useful Returns: a generator object Example: regions = {row['code']: row['name'] for row in wbgapi.region.list()} ''' params = {'type': group} if group else {} for row in w.fetch('region/' + w.queryParam(id), params): yield row
def members(id, param='region'): '''Return a set of economy identifiers that are members of the specified region Arguments: id: a region identifier param: used internally Returns: a set object of economy identifiers Notes: the returned members may not match the economies in the current database since we access the universal region lists from the API ''' e = set() for row in w.fetch('country', {param: id}): e.add(row['id']) return e
def concepts(db=None): '''Retrieve the concepts for the specified database. Arguments: db: the database ID (e.g., 2=WDI). Default to the global database Returns: a dictionary of concepts: keys are URL friendly Example: for k,v in wbgapi.source.concepts(2).items(): print(k, v['key'], v['value']) ''' global _concepts if db is None: db = w.db db = int(db) c = _concepts.get(db) if c is not None: return c url = 'sources/{}/concepts'.format(db) c = {} for row in w.fetch(url, concepts=True): key = urllib.parse.quote(row['id']).lower() if key in ['country', 'economy', 'admin%20region', 'states', 'provinces']: id = 'economy' elif key in ['time', 'year']: id = 'time' else: id = key c[id] = {'key': key, 'value': row['value']} _concepts[db] = c return c
def list(id='all', q=None): '''Return a list of lending groups Arguments: id: a lending group identifier or list-like of identifiers q: search string (on lending group name) Returns: a generator object Example: lendingGroups = {row['id']: row['value'] for row in wbapi.lending.list()} Notes: The lending group list is global to the entire API and is not specific to the current database. ''' q, _ = utils.qget(q) for row in w.fetch('lendingtype/' + w.queryParam(id)): if utils.qmatch(q, row['value']): yield row
def list(id='all', q=None): '''Return a list of topics Arguments: id: a region identifier or list-like of identifiers q: search string (on topic name) Returns: a generator object Example: topics = {row['value']: row['id'] for row in wbgapi.topic.list()} Notes: The topic list is global to the entire API and is not specific to the current database. ''' q, _ = utils.qget(q) for row in w.fetch('topic/' + w.queryParam(id)): if utils.qmatch(q, row['value']): yield row
archiveDBs = [i for i in config['--archived-dbs'].split(',')] yearKeys = [int(i) for i in config['--since'].split(',')] _yearBreaks = {} for i in yearKeys: _yearBreaks[i] = 0 # sanity checks if len(config['INDICATOR']) > 1: config['--verbose'] = False # get populations _pops = {} for row in wbgapi.fetch( 'https://api.worldbank.org/v2/en/country/all/indicator/SP.POP.TOTL', {'MRNEV': 1}): if row['countryiso3code']: _pops[row['countryiso3code']] = row['value'] # Then fetch the the country list _countries = {} countOfSmallCountries = 0 countOfRichCountries = 0 country_meta = {} for elem in wbgapi.fetch('https://api.worldbank.org/v2/en/country'): if config['--bssi'] and elem['id'] not in bssi_countries: continue if config['--income'] and (elem['incomeLevel']['id'] == config['--income']) != incomeFlag:
def coder(name, debug=None): '''Return the country code for a given country name, based on common spellings and conventions. This function is intended to make it easier to convert country names to ISO3 codes. This feature is English-only and still in development. You can extend the matching algorithm by editing the `lookup-data.yaml` file. Arguments: name: a country name as a string, or an iterable object of name strings debug: a list of ISO codes for which to print debug output Returns: If `name` is a string then the function returns the corresponding ISO3 code, or None if the code can't be ascertained. If `name` is an iterable object, the function returns a dict of country names (passed as arguments) and corresponding ISO3 codes. Country names that cannot be ascertained have a value of None Examples: print(wbgapi.economy.lookup('Eswatini')) # prints 'SWZ' print(wbgapi.economy.lookup('Swaziland')) # prints 'SWZ' print(wbgapi.economy.lookup(['Canada', 'Toronto'])) # prints {'Canada': 'CAN', 'Toronto': None} ''' global _lookup_data def prepare(s, clean=False, magicRegex=False): s = s.lower() if clean: # should be False if the string is regex-capable # this next trick is strips the container parentheses from "... (US|UK)" # and leaves the inner part. Need this for the Virgin Islands since, # before we remove parenthetical text entirely s = re.sub(r'\((u\.?s\.?|u\.?k\.?)\)', lambda t: t.group(1).replace('.', ''), s) s = re.sub(r'\s*\(.*\)', '', s) # remove parenthetical text s = s.replace("'", '') # remove apostrophes s = re.sub(r'[^a-z0-9&]', ' ', s) # convert remaining superflous chars to spaces s = s.strip() if magicRegex: # converts 'and' to (and|&), 'st' to (st|saint) s = re.sub(r'\band\b', r'(and|\&)', s) s = re.sub(r'\bst\b', r'(st|saint)', s) s = re.sub(r'\s+', r'\\s+', s) return s if _lookup_data is None: _lookup_data = [] user_data = yaml.safe_load( open(os.path.join(os.path.dirname(__file__), 'lookup-data.yaml'), 'r')) for row in w.fetch('country/all', lang='en'): if row['region']['id'] == 'NA': continue # ignore aggregates obj = user_data.get(row['id'], {}) # convert ordinary arrays to objects - for most cases this simplifies the yaml if type(obj) is list: obj = {'patterns': obj} try: order = obj.get('order', 10) except: print(obj) raise _lookup_data.append((row['id'].lower(), row['id'], False, order)) _lookup_data.append(('\\b{}\\b'.format( prepare(row['name'], clean=True, magicRegex=True)), row['id'], True, order)) for row2 in obj.get('patterns', []): if row2[0:1] == ':': # treat as an exact case-insensitive string match _lookup_data.append( (row2[1:].lower(), row['id'], False, order)) else: # treat as a regex string which can match on any word boundary _lookup_data.append(('\\b{}\\b'.format( prepare(row2, clean=False, magicRegex=True)), row['id'], True, order)) _lookup_data.sort(key=lambda x: x[3]) if type(name) is str: name = [name] is_list = False else: is_list = True results = w.Coder({k: None for k in name}) for t in name: t2 = prepare(t, clean=True, magicRegex=False) for pattern, id, mode, order in _lookup_data: if debug and id in debug: print('{}: matching "{}"/{} against "{}"'.format( id, pattern, mode, t2)) if mode and re.search(pattern, t2): results[t] = id break elif not mode and pattern == t2: results[t] = id break if is_list: return results return results.get(name[0])
def coder(name, summary=False, debug=None): '''Return the country code for a given country name, based on common spellings and conventions. This function is intended to make it easier to convert country names to ISO3 codes. This feature is English-only and still in development. You can extend the matching algorithm by editing the `lookup-data.yaml` file. Arguments: name: a country name as a string, or an iterable object of name strings summary: just return anomalies (names that couldn't be matched or that don't match the WBG name). debug: a list of ISO codes for which to print debug output Returns: If `name` is a string then the function returns the corresponding ISO3 code, or None if the code can't be ascertained. If `name` is a pandas Series, the function returns a pandas Series with the same index. Note that if the summary is True then the function always returns a Coder object. If `name` is any other iterable object, the function returns a Coder object. Coder is a dict subclass with some sugar to produce a nice command line (or jupyter notebook) report. Country names that cannot be coded have a value of None. Note that if summary is True then the function ALWAYS returns a Coder object. Examples: print(wbgapi.economy.lookup('Eswatini')) # prints 'SWZ' print(wbgapi.economy.lookup('Swaziland')) # prints 'SWZ' print(wbgapi.economy.lookup(['Canada', 'Toronto'])) # prints {'Canada': 'CAN', 'Toronto': None} ''' global _lookup_data, _coder_names def prepare(s, clean=False, magicRegex=False): s = s.lower() if clean: # should be False if the string is regex-capable # this next trick is strips the container parentheses from "... (US|UK)" # and leaves the inner part. Need this for the Virgin Islands since, # before we remove parenthetical text entirely s = re.sub(r'\((u\.?s\.?|u\.?k\.?)\)', lambda t: t.group(1).replace('.', ''), s) s = re.sub(r'\s*\(.*\)', '', s) # remove parenthetical text s = s.replace("'", '') # remove apostrophes s = re.sub(r'[^a-z0-9&]', ' ', s) # convert remaining superflous chars to spaces s = s.strip() if magicRegex: # converts 'and' to (and|&), 'st' to (st|saint) s = re.sub(r'\band\b', r'(and|\&)', s) s = re.sub(r'\bst\b', r'(st|saint)', s) s = re.sub(r'\s+', r'\\s+', s) return s if _lookup_data is None: _lookup_data = [] _coder_names = {} user_data = yaml.safe_load( open(os.path.join(os.path.dirname(__file__), 'lookup-data.yaml'), 'r')) for row in w.fetch('country/all', lang='en'): if row['region']['id'] == 'NA': continue # ignore aggregates _coder_names[row['id']] = row['name'] obj = user_data.get(row['id'], {}) # convert ordinary arrays to objects - for most cases this simplifies the yaml if type(obj) is list: obj = {'patterns': obj} try: order = obj.get('order', 10) except: print(obj) raise _lookup_data.append((row['id'].lower(), row['id'], 0, order)) _lookup_data.append(('\\b{}\\b'.format( prepare(row['name'], clean=True, magicRegex=True)), row['id'], 1, order)) for row2 in obj.get('patterns', []): if row2[0:1] == ':': # treat as an exact case-insensitive string match _lookup_data.append( (row2[1:].lower(), row['id'], 0, order)) elif row2[0:1] == '~': # treat as regex string, but EXCLUDE this pattern _lookup_data.append(('\\b{}\\b'.format( prepare(row2[1:], clean=False, magicRegex=True)), row['id'], 2, order)) else: # treat as a regex string which can match on any word boundary _lookup_data.append(('\\b{}\\b'.format( prepare(row2, clean=False, magicRegex=True)), row['id'], 1, order)) _lookup_data.sort(key=lambda x: x[3]) if type(name) is str: name = [name] is_list = False else: is_list = True if summary == False and pd is not None and type( name) is pd.core.series.Series: results = pd.Series(index=name.index, dtype=object, name='iso3') else: results = w.Coder({k: None for k in name}) n = 0 for t in name: excludes = [] t2 = prepare(t, clean=True, magicRegex=False) for pattern, id, mode, order in _lookup_data: if debug and id in debug: print('{}: matching "{}"/{} against "{}"'.format( id, pattern, mode, t2)) if id in excludes: if debug and id in debug: print('{}: excluded'.format(id)) elif mode == 2 and re.search(pattern, t2): # all further patterns for this id will be ignored excludes.append(id) elif mode == 1 and re.search(pattern, t2): if type(results) is w.Coder: results[t] = id else: results.iloc[n] = id break elif mode == 0 and pattern == t2: if type(results) is w.Coder: results[t] = id else: results.iloc[n] = id break n += 1 if is_list or summary: if summary and type(results) is w.Coder: results = w.Coder( dict( filter( lambda x: x[0].lower() != _coder_names.get(x[1], ''). lower() if x[1] else True, results.items()))) return results return results.get(name[0])