def eez(country_shapes): cntries = snakemake.config['countries'] cntries3 = frozenset(countrycode(cntries, origin='iso2c', target='iso3c')) df = gpd.read_file(snakemake.input.eez) df = df.loc[df['ISO_3digit'].isin(cntries3)] df['name'] = countrycode(df['ISO_3digit'], origin='iso3c', target='iso2c') s = df.set_index('name').geometry.map(lambda s: _simplify_polys(s, filterremote=False)) return gpd.GeoSeries({k:v for k,v in s.iteritems() if v.distance(country_shapes[k]) < 1e-3})
def parse_Geoposition(loc, country=None, return_Country=False): """ Nominatim request for the Geoposition of a specific location in a country. Returns a tuples with (latitude, longitude) if the request was sucessful, returns None otherwise. ToDo: There exist further online sources for lat/long data which could be used, if this one fails, e.g. - Google Geocoding API - Yahoo! Placefinder - https://askgeo.com (??) Parameters ---------- loc : string description of the location, can be city, area etc. country : string name of the country which will be used as a bounding area """ from geopy.geocoders import Nominatim if loc is not None and loc != float: country = countrycode(codes=[country], origin='country_name', target='iso2c')[0] gdata = Nominatim(timeout=500, country_bias=country).geocode(loc) if gdata != None: if return_Country: return gdata.address.split(', ')[-1] lat = gdata.latitude lon = gdata.longitude return (lat, lon)
def get_country(self, row, path="#country", return_default=True): country = "" if path + "+identifier" in row.keys(): country = row[path + '+identifier'] if (country): return country.upper() if (len(country) < 2) and (path in row.keys()): if row.get(path, "xx") in self.country_cache.keys(): country = self.country_cache[row.get(path, "xx")] else: country = countrycode(codes=[row.get(path, "")], origin='country_name', target="iso2c")[0] if (len(country) == 2): self.country_cache[row.get(path, "xx")] = country else: country = self.clean_string(row.get(path, "xx")).strip() self.country_cache[row.get(path, "xx")] = country else: if return_default: country = self.default_country else: country = "unknown" return country.upper()
def eez(subset=None, filter_remote=True, tolerance=0.03): names = [] shapes = [] countries3 = frozenset(countrycode(subset, origin='iso2c', target='iso3c')) with fiona.drivers(), fiona.open(toDataDir('World_EEZ/World_EEZ_v8_2014.shp')) as f: for sh in f: name = sh['properties']['ISO_3digit'] if name in countries3: names.append(sh['properties']['ISO_3digit']) shapes.append(simplify_poly(shape(sh['geometry']), tolerance=tolerance)) names = countrycode(names, origin='iso3c', target='iso2c') if filter_remote: country_shapes = countries(subset) return pd.Series(dict((name, shape) for name, shape in zip(names, shapes) if shape.distance(country_shapes[name]) < 1e-3)).sort_index() else: return pd.Series(shapes, index=names)
def get_ranges(self): code = countrycode.countrycode(codes=[self.country], origin='country_name', target='iso2c')[0] resp = requests.get( 'http://www.ipdeny.com/ipblocks/data/aggregated/{}-aggregated.zone' .format(code.lower())) if 'title' in resp.text: self.ranges = [] return False self.ranges = [r for r in resp.text.split('\n') if r.strip()] return True
def normalizeCountry(country_str, target="iso3c", title_case=False): '''Return a normalized name/code for country in ``country_str``. The input can be a code or name, the ``target`` determines output value. 3 character ISO code is the default (iso3c), 'country_name', and 'iso2c' are common also. See ``countrycode.countrycode`` for details and other options. Raises ``ValueError`` if the country is unrecognized.''' iso2 = "iso2c" iso3 = "iso3c" raw = "country_name" if country_str is None: return u'' if len(country_str) == 2: cc = countrycode(country_str.upper(), origin=iso2, target=target) if not cc: cc = countrycode(country_str, origin=raw, target=target) elif len(country_str) == 3: cc = countrycode(country_str.upper(), origin=iso3, target=target) if not cc: cc = countrycode(country_str, origin=raw, target=target) else: cc = countrycode(country_str, origin=raw, target=target) # Still need to validate because origin=raw will return whatever is # input if not match is found. cc = countrycode(cc, origin=target, target=target) if cc else None if not cc: raise ValueError("Country not found: %s" % (country_str)) return cc.title() if title_case else cc
def normalizeCountry(country_str, target="iso3c", title_case=False): """Return a normalized name/code for country in ``country_str``. The input can be a code or name, the ``target`` determines output value. 3 character ISO code is the default (iso3c), 'country_name', and 'iso2c' are common also. See ``countrycode.countrycode`` for details and other options. Raises ``ValueError`` if the country is unrecognized.""" iso2 = "iso2c" iso3 = "iso3c" raw = "country_name" if country_str is None: return '' if len(country_str) == 2: cc = countrycode(country_str.upper(), origin=iso2, target=target) if not cc: cc = countrycode(country_str, origin=raw, target=target) elif len(country_str) == 3: cc = countrycode(country_str.upper(), origin=iso3, target=target) if not cc: cc = countrycode(country_str, origin=raw, target=target) else: cc = countrycode(country_str, origin=raw, target=target) # Still need to validate because origin=raw will return whatever is # input if not match is found. cc = countrycode(cc, origin=target, target=target) if cc else None if not cc: raise ValueError("Country not found: %s" % (country_str)) return cc.title() if title_case else cc
def get_country_code_from_name(self, name): try: country = self.country_cache[name] except KeyError: country = countrycode(codes=[name], origin='country_name', target="iso2c")[0] if (len(country) == 2): self.country_cache[name] = country.upper() else: country = random_string() self.country_cache[name] = country.upper() return country.upper()
def country_input(require_present=True, string=None): if string is None: string = "Enter the delegation's name: " while True: output = countrycode.countrycode(codes=[input(string)], origin='country_name', target='iso2c')[0].upper() for delegation in state.delegations: if delegation.country_code == output: if not require_present or delegation.present: return state.delegations.index(delegation) else: print("Delegation is not present.") break else: print("Delegation not found, try the country code?")
def parse_dataset(data, private=True): ''' Function that parses a dataset. ''' # # Check that there is acually # metadata to parse. # # if data.get('worldPopData') is None: # raise ValueError('No data to parse.') resource = { "package_id": str(slugify(data['Dataset Title']))[:90], "url": data['URL_direct'], "name": data['Location'] + '.zip', "format": 'zip', "description": None } metadata = { 'name': str(slugify(data['Dataset Title']))[:90], 'title': str(data['Dataset Title']), 'owner_org': 'worldpop', 'author': 'andytatem', 'author_email': '*****@*****.**', 'maintainer': 'andytatem', 'maintainer_email': '*****@*****.**', 'license_id': 'cc-by-sa', 'dataset_date': None, # has to be MM/DD/YYYY 'subnational': 1, # has to be 0 or 1. Default 1 for WorldPop. 'notes': data['Description'], 'caveats': None, 'methodology': 'Other', 'methodology_other': 'For more information about methods, please refer to ' + data['URL_summaryPag'], 'dataset_source': data['Source'], 'package_creator': 'luiscape', 'private': private, # has to be True or False 'url': None, 'state': 'active', # always "active". 'tags': [{ 'name': 'Map' }, { 'name': 'Population' }], # has to be a list with { 'name': None } 'groups': [ { 'id': countrycode(codes=str(data['Location']), origin='country_name', target='iso3c').lower() }] # has to be ISO-3-letter-code. { 'id': None } } return { 'metadata': metadata, 'resource': resource }
def __init__(self, country_code): self.country_code = country_code self.country = countrycode.countrycode(codes=[country_code], origin='iso2c', target='country_name')[0] if self.country is None: raise Exception("Invalid country code '" + country_code + "'") self.veto = country_code in config['committee']['veto'] self.speech_time = 0 self.poi_time = 0 self.poi_answer_time = 0 self.motions_raised = 0 self.pois_raised = 0 self.amendments_made = 0 self.votes = [0, 0, 0] # keeps track of votes for, against and abstentions self.veto_used = 0 self.present = False self.no_abstentions = False
def collect_countries(): ''' Collects lists of countries and links from ACLED's website. ''' u = 'http://www.acleddata.com/data/version-6-data-1997-2015/' level = 7 r = requests.get(u) soup = BeautifulSoup(r.content, 'html.parser') x = soup.findAll('ul') countries = [] for l in x[level]: if len(l) == 2: countries.append({ 'name': l.get_text().replace(' (xls)', ''), 'url': l.findAll('a')[0].get('href'), 'iso': countrycode(l.get_text().replace(' (xls)', ''), 'country_name', 'iso3c').lower() }) return countries
def get_eia_annual_hydro_generation(fn=None): if fn is None: fn = toDataDir('Hydro_Inflow/EIA_hydro_generation_2000_2014.csv') # in billion KWh/a = TWh/a eia_hydro_gen = pd.read_csv(fn, skiprows=4, index_col=1, na_values=[u' ', '--']).drop( ['Unnamed: 0', 'Unnamed: 2'], axis=1).dropna(how='all') countries_iso2c = countrycode(eia_hydro_gen.index.values, origin='country_name', target='iso2c') eia_hydro_gen.index = pd.Index(countries_iso2c, name='countries') eia_hydro_gen.rename(index={'Kosovo': 'KV'}, inplace=True) eia_hydro_gen = eia_hydro_gen.T return eia_hydro_gen * 1e6 #in MWh/a
def transform(record: dict, key_ref: dict, country_ref: pd.DataFrame, who_coding: pd.DataFrame): """ Apply transformations to CDC_ITF records. Parameters ---------- record : dict Input record. key_ref : dict Reference for key mapping. country_ref : pd.DataFrame Reference for WHO accepted country names. who_coding : pd.DataFrame Reference for WHO coding. Returns ------- dict Record with transformations applied. """ # 1. Join comments in ``Concise Notes`` and ``Notes`` columns comments = join_comments(record) # 2. Create a new blank record new_record = utils.generate_blank_record() # 3. replace data in new record with data from old record using key_ref record = utils.apply_key_map(new_record, record, key_ref) # 4. Assign merged comments to new record record['comments'] = comments # 6. Assign unique ID (shared) #record = utils.assign_id(record) # If area_covered is national, set to blank record = area_covered_national(record) # 5. Handle date formatting record = utils.parse_date(record) # 6. Assign date_end with measure_stage value record = add_date_end(record) # 7. Make manual country name changes record = utils.replace_conditional(record, 'country_territory_area', 'Saint Martin', 'French Saint Martin') record = utils.replace_conditional(record, 'country_territory_area', 'Réunion', 'Reunion') record = utils.replace_conditional(record, 'country_territory_area', 'Curaçao', 'Curacao') record = utils.replace_conditional(record, 'country_territory_area', 'Curaçao', 'Curacao') record = utils.replace_conditional(record, 'country_territory_area', 'Curaçao', 'Curacao') record = utils.replace_conditional(record, 'country_territory_area', 'St. Barts', 'Saint Barthelemy') record = utils.replace_conditional(record, 'country_territory_area', 'Czechia', 'Czech Republic') record = utils.replace_conditional(record, 'country_territory_area', 'D. P. R. of Korea', 'North Korea') record = utils.replace_conditional(record, 'country_territory_area', 'Eswatini', 'Swaziland') record = utils.replace_conditional(record, 'country_territory_area', 'South Korea', 'Korea') record = utils.replace_conditional(record, 'country_territory_area', 'Bonaire, Saint Eustatius and Saba', 'Carribean Netherlands') # 7. Make manual measure_stage name changes record = utils.replace_conditional(record, 'measure_stage', 'Impose', 'new') record = utils.replace_conditional(record, 'measure_stage', 'Lift', 'phase-out') record = utils.replace_conditional(record, 'measure_stage', 'Pause', 'modification') record = utils.replace_conditional(record, 'measure_stage', 'Ease', 'modification') record = utils.replace_conditional(record, 'measure_stage', 'Strengthen', 'modification') # 7. Make manual non_compliance_penalty name changes record = utils.replace_conditional(record, 'non_compliance_penalty', 'Yes', 'not known') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Yes ', 'not known') record = utils.replace_conditional(record, 'non_compliance_penalty', 'yes ', 'not known') record = utils.replace_conditional(record, 'non_compliance_penalty', 'yes', 'not known') record = utils.replace_conditional(record, 'non_compliance_penalty', 'No', None) record = utils.replace_conditional(record, 'non_compliance_penalty', "No'", None) # 8. replace sensitive country names record = utils.replace_sensitive_regions(record) # 9. assign ISO code record['iso'] = countrycode(codes=record['country_territory_area'], origin='country_name', target='iso3c') # 10. check missing ISO check.check_missing_iso(record) # 11. Join WHO accepted country names (shared) record = utils.assign_who_country_name(record, country_ref) # 12. Join who coding from lookup (shared) record = utils.assign_who_coding(record, who_coding) # 13. check for missing WHO codes (shared) check.check_missing_who_code(record) # 14. set all admin_level values to national record = utils.replace_conditional(record, 'admin_level', 'Subnational/regional only', 'other') record = utils.replace_conditional(record, 'admin_level', 'subnational/regional only', 'other') record = utils.replace_conditional(record, 'admin_level', 'National', 'national') # 15. Replace measure_stage extension record = utils.replace_conditional(record, 'measure_stage', 'Extend with same stringency', 'extension') # 16. Add WHO PHSM admin_level values record = utils.add_admin_level(record) record = utils.remove_tags(record, ['comments', 'link', 'alt_link']) return (record)
def transform(record: dict, key_ref: dict, country_ref: pd.DataFrame, who_coding: pd.DataFrame, prov_measure_filter: pd.DataFrame): """ Apply transformations to JH_HIT records. Parameters ---------- record : dict Input record. key_ref : dict Reference for key mapping. country_ref : pd.DataFrame Reference for WHO accepted country names. who_coding : pd.DataFrame Reference for WHO coding. prov_measure_filter : pd.DataFrame Reference for filtering by `prov_measure` values. Returns ------- dict Record with transformations applied. """ # 1. if pd.isnull(record['locality']) and pd.isnull(record['usa_county']): return (None) # 2. generator function of new record with correct keys (shared) new_record = utils.generate_blank_record() # 3. replace data in new record with data from old record using column # reference (shared) record = utils.apply_key_map(new_record, record, key_ref) # 4. record = apply_prov_measure_filter(record, prov_measure_filter) # replace with a None - passing decorator if record is None: return (None) # 5. Handle date - infer format (shared) record = utils.parse_date(record) # 6. Assign unique ID (shared) #record = utils.assign_id(record) # 7. replace non ascii characters (shared) # 8. replace sensitive country names by ISO (utils) record = utils.replace_sensitive_regions(record) # 9. assign ISO code record['iso'] = countrycode(codes=record['country_territory_area'], origin='country_name', target='iso3c') # 10. check for missing ISO codes (shared) check.check_missing_iso(record) # 11. Join WHO accepted country names (shared) record = utils.assign_who_country_name(record, country_ref) # 12. Join who coding from lookup (shared) record = utils.assign_who_coding(record, who_coding) # 13. check for missing WHO codes (shared) check.check_missing_who_code(record) # 14. replace admin_level values record = utils.replace_conditional(record, 'admin_level', '', 'unknown') record = utils.replace_conditional(record, 'admin_level', 'Yes', 'national') record = utils.replace_conditional(record, 'admin_level', 'No', 'state') # Replace JH enforcement == 'unknown' with None record = utils.replace_conditional(record, 'enforcement', 'unknown', None) # Replace JH targeter values record = utils.replace_conditional(record, 'targeted', 'geographic subpobulation', None) record = utils.replace_conditional(record, 'targeted', 'entire population', None) # 15. fill_not_enough_to_code record = fill_not_enough_to_code(record) # 16. replace unknown non_compliance_penalty record = utils.replace_conditional(record, 'non_compliance_penalty', 'unknown', 'Not Known') record = utils.remove_tags(record) record = blank_record_and_url(record) return (record)
def transform(record: dict, key_ref: dict, country_ref: pd.DataFrame, who_coding: pd.DataFrame): """ Apply transformations to EURO records. Parameters ---------- record : dict Input record. key_ref : dict Reference for key mapping. country_ref : pd.DataFrame Reference for WHO accepted country names. who_coding : pd.DataFrame Reference for WHO coding. Returns ------- dict Record with transformations applied. """ # 1. Create a new blank record new_record = utils.generate_blank_record() # 2. replace data in new record with data from old record using key_ref record = utils.apply_key_map(new_record, record, key_ref) #print(record) #print(record["prop_id"]) # 3. Make manual country name changes record = utils.replace_conditional(record, 'country_territory_area', 'Kosovo*', 'Kosovo') record = utils.replace_conditional(record, 'country_territory_area', 'Bewlgium', 'Belgium') record = utils.replace_conditional(record, 'country_territory_area', 'Luxemburg', 'Luxembourg') # Replace enforcement values record = utils.replace_conditional(record, 'enforcement', ' ', 'not known') record = utils.replace_conditional(record, 'enforcement', 0, 'not applicable') record = utils.replace_conditional(record, 'enforcement', '0', 'not applicable') record = utils.replace_conditional(record, 'enforcement', 1, 'recommended') record = utils.replace_conditional(record, 'enforcement', '1', 'recommended') record = utils.replace_conditional(record, 'enforcement', '2', 'required') record = utils.replace_conditional(record, 'enforcement', 2, 'required') record = utils.replace_conditional(record, 'enforcement', 3, 'monitored') # Replace measure_stage values record = utils.replace_conditional(record, 'measure_stage', 1, 'new') record = utils.replace_conditional(record, 'measure_stage', '1', 'new') record = utils.replace_conditional(record, 'measure_stage', 2, 'modification') record = utils.replace_conditional(record, 'measure_stage', '2', 'modification') record = utils.replace_conditional(record, 'measure_stage', 3, 'phase out') record = utils.replace_conditional(record, 'measure_stage', '3', 'phase out') # Change a who_code value based on measure_stage record = update_school_record(record) # Strip whitespace characters from coding record['prov_category'] = record['prov_category'].strip() record['prov_subcategory'] = record['prov_subcategory'].strip() record['prov_measure'] = record['prov_measure'].strip() # 4. replace sensitive country names by ISO (utils) record = utils.replace_sensitive_regions(record) record['iso'] = countrycode(codes=record['country_territory_area'], origin='country_name', target='iso3c') # 6. check for missing ISO codes (shared) check.check_missing_iso(record) # 7. Join WHO accepted country names (shared) record = utils.assign_who_country_name(record, country_ref) # 12. Join who coding from lookup (shared) record = utils.assign_who_coding(record, who_coding) # 13. check for missing WHO codes (shared) check.check_missing_who_code(record) # 8. Add WHO PHSM admin_level values record = utils.add_admin_level(record) record = utils.remove_tags(record) return record
def transform(record: dict, key_ref: dict, country_ref: pd.DataFrame, who_coding: pd.DataFrame): """ Apply transformations to ACAPS records. Parameters ---------- record : dict Input record. key_ref : dict Reference for key mapping. country_ref : pd.DataFrame Reference for WHO accepted country names. who_coding : pd.DataFrame Reference for WHO coding. Returns ------- dict Record with transformations applied. """ # 1. Create a new blank record new_record = utils.generate_blank_record() # 2. replace data in new record with data from old record using key_ref record = utils.apply_key_map(new_record, record, key_ref) # Remove records where area covered is a single space if record['area_covered'] == ' ': record['area_covered'] = '' # 6. Assign unique ID (shared) #record = utils.assign_id(record) # shift areas that should be countries. record = utils.replace_country(record, 'Denmark', 'Greenland') # 3. Make manual country name changes record = utils.replace_conditional(record, 'country_territory_area', 'DRC', 'Democratic Republic of the Congo') record = utils.replace_conditional(record, 'country_territory_area', 'CAR', 'Central African Republic') record = utils.replace_conditional(record, 'country_territory_area', 'DPRK', 'North Korea') record = utils.replace_conditional(record, 'country_territory_area', 'Eswatini', 'Swaziland') # Make manual measure_stage changes record = utils.replace_conditional(record, 'measure_stage', 'Introduction / extension of measures', 'introduction / extension of measures') record = utils.replace_conditional(record, 'measure_stage', 'Phase-out measure', 'phase-out') # Make manual non_compliance_penalty changes record = utils.replace_conditional(record, 'non_compliance_penalty', 'Legal Action', 'legal action') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Legal action', 'legal action') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Legal', 'legal action') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Up to detention', 'up to detention') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Up to Detention', 'up to detention') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Arrest/Detention', 'arrest/detention') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Deportation', 'deportation') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Refusal to enter the country', 'refused entry') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Refusal to enter the Country', 'refused entry') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Refusal to Enter the Country', 'refused entry') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Other (add in comments)', 'not known') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Fines', 'fines') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Other', 'not known') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Not Available', 'not known') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Not available', 'not known') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Not available ', 'not known') record = utils.replace_conditional(record, 'non_compliance_penalty', 'not available ', 'not known') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Not Applicable', 'not applicable') record = utils.replace_conditional(record, 'non_compliance_penalty', 'Not applicable', 'not applicable') record = utils.replace_conditional(record, 'non_compliance_penalty', 'not applicable ', 'not known') # Replace targeted values record = utils.replace_conditional(record, 'targeted', 'checked', None) record = utils.replace_conditional(record, 'targeted', 'Checked', None) record = utils.replace_conditional(record, 'targeted', 'general', None) record = utils.replace_conditional(record, 'targeted', 'General', None) # 4. replace sensitive country names by ISO (utils) record = utils.replace_sensitive_regions(record) # 5. assign ISO code record['iso'] = countrycode(codes=record['country_territory_area'], origin='country_name', target='iso3c') # 6. check for missing ISO codes (shared) check.check_missing_iso(record) # 7. Join WHO accepted country names (shared) record = utils.assign_who_country_name(record, country_ref) # 12. Join who coding from lookup (shared) record = utils.assign_who_coding(record, who_coding) # 13. check for missing WHO codes (shared) check.check_missing_who_code(record) # 8. Add WHO PHSM admin_level values record = utils.add_admin_level(record) record = utils.remove_tags(record) return (record)
def parse_dataset(data, private=True, fail_no_country=True): """ Function that parses a dataset. """ # # Check that there is acually # metadata to parse. # if data.get("latestVersion") is None: raise ValueError("No data to parse.") if data["latestVersion"]["metadataBlocks"].get("geospatial") is None: raise ValueError("No country entry found.") resource = {"package_id": None, "url": None, "name": None, "format": None, "description": None} metadata = { "name": None, "title": None, "owner_org": "ifpri", "author": "ifpridata", "author_email": "*****@*****.**", "maintainer": "ifpridata", "maintainer_email": "*****@*****.**", "license_id": "cc-by-sa", "dataset_date": None, # has to be MM/DD/YYYY "subnational": 1, # has to be 0 or 1. Default 1 for IFPRI. "notes": None, "caveats": None, "data_update_frequency": "0", "methodology": "Other", "methodology_other": None, "dataset_source": "", "package_creator": "luiscape", "private": private, # has to be True or False "url": None, "state": "active", # always "active". "tags": [{"name": "Food"}, {"name": "Security"}], # has to be a list with { 'name': None } "groups": [], # has to be ISO-3-letter-code. { 'id': None } } gallery = { "title": None, "type": "paper", "description": None, "url": None, "image_url": "http://www.ifpri.org/sites/all/themes/custom/serenity/logo.png", # IFPRI's logo. "dataset_id": None, } # # Parsing for: # # - metadata name # - metadata title # - metadata dataset_date # - metadata notes # - metadata groups (countries) # - metadata source # for field in data["latestVersion"]["metadataBlocks"]["citation"]["fields"]: if field.get("typeName") == "title": metadata["title"] = str(field["value"]) metadata["name"] = str(slugify(field["value"]))[:90] if field.get("typeName") == "timePeriodCovered": for f in field["value"]: if f.get("timePeriodCoveredStart") is not None: metadata["dataset_date"] = str(f["timePeriodCoveredStart"]["value"]) else: metadata["dataset_date"] = "" authors = [] if field.get("typeName") == "author": for f in field["value"]: if f["authorName"].get("value") is not None: authors.append(f["authorName"].get("value")) metadata["dataset_source"] = ", ".join(authors) if field.get("typeName") == "dsDescription": metadata["notes"] = str(field.get("value")[0].get("dsDescriptionValue").get("value")) for location in data["latestVersion"]["metadataBlocks"]["geospatial"]["fields"]: if location.get("typeName") == "geographicCoverage": for country in location["value"]: if country.get("country") is not None: name = country["country"].get("value") code = countrycode(codes=str(name), origin="country_name", target="iso3c") result = {"id": code.lower()} metadata["groups"].append(result) else: if fail_no_country: raise ValueError("No country entry found.") else: pass resources = [] desired_file_extensions = ["xls", "xlsx", "csv", "zip", "tsv", "shp", "geojson", "json"] for file in data["latestVersion"]["files"]: # # Checking for data file. # file_name = file.get("datafile").get("name") if file_name is not None: extension = os.path.splitext(file_name)[1][1:].lower() if extension in desired_file_extensions: resource["package_id"] = metadata["name"] resource["url"] = "https://dataverse.harvard.edu/api/access/datafile/" + str(file["datafile"].get("id")) resource["name"] = file_name resource["format"] = extension.upper() resources.append(copy(resource)) else: continue return {"metadata": metadata, "resources": resources}
def transform(record: dict, key_ref: dict, country_ref: pd.DataFrame, who_coding: pd.DataFrame, no_update_phrase: pd.DataFrame): """ Apply transformations to OXCGRT records. Parameters ---------- record : dict Input record. key_ref : dict Reference for key mapping. country_ref : pd.DataFrame Reference for WHO accepted country names. who_coding : pd.DataFrame Reference for WHO coding. no_update_phrase : pd.DataFrame Reference for "no update" phrases. Returns ------- dict Record with transformations applied. """ # 1. generator function of new record with correct keys (shared) new_record = utils.generate_blank_record() # 2. replace data in new record with data from old record using column # reference (shared) record = utils.apply_key_map(new_record, record, key_ref) # 3. Assign unique ID (shared) # record = utils.assign_id(record) if record["prov_measure"] == "H8_Protection of elderly people": return None # 4. Handle date formatting record = utils.parse_date(record) # 8. replace sensitive country names record = utils.replace_sensitive_regions(record) # shift areas that should be countries. record = utils.replace_country(record, 'United States', 'Virgin Islands') # 7. Make manual country name changes record = utils.replace_conditional(record, 'country_territory_area', 'Virgin Islands', 'US Virgin Islands') record = utils.replace_conditional(record, 'country_territory_area', 'United States Virgin Islands', 'US Virgin Islands') record = utils.replace_conditional(record, 'country_territory_area', 'Eswatini', 'Swaziland') record = utils.replace_conditional(record, 'country_territory_area', 'South Korea', 'Korea') # 9. assign ISO code record['iso'] = countrycode(codes=record['country_territory_area'], origin='country_name', target='iso3c') # 10. check missing ISO check.check_missing_iso(record) # Remove records where there is no data in prov_subcategory if record['prov_subcategory'] == 0: return(None) # Removes information in flag variables for now record['prov_subcategory'] = int(record['prov_subcategory']) # 11. Join WHO accepted country names (shared) record = utils.assign_who_country_name(record, country_ref) record = financial_measures(record) # 12. Join who coding from lookup (shared) record = utils.assign_who_coding(record, who_coding) # 13. check for missing WHO codes (shared) check.check_missing_who_code(record) # 16. Add WHO PHSM admin_level values record = utils.add_admin_level(record) record = utils.remove_tags(record) # 17. Remove update records record = assign_comment_links(record) # Filter out records with "no update" phrases record = label_update_phrase(record, list(no_update_phrase['phrase'])) return(record)