def testIssue49(self): ''' country recognition ''' show = self.debug texts = [ 'United Kingdom', 'UK', 'Great Britain', 'GB', 'United States' ] expected = [ "United Kingdom", "United Kingdom", "United Kingdom", "United Kingdom", "United States of America" ] if show: print("lookup with geograpy.get_geoPlace_context") for text in texts: countries = geograpy.get_geoPlace_context(text=text).countries if show: print(f"{text}:{countries}") if show: print("lookup with PlaceContext") for i, text in enumerate(texts): pc = PlaceContext([text]) pc.set_countries() if show: print(f"{text}:{pc.countries}") self.assertEqual([expected[i]], pc.countries)
def get_place_context(url=None, text=None): e = Extractor(url=url, text=text) e.find_entities() pc = PlaceContext(e.places) pc.set_countries() pc.set_regions() pc.set_cities() pc.set_other() return pc
def testGetRegionNames(self): ''' test getting region names ''' pc = PlaceContext(place_names=["Berlin"]) regions = pc.getRegions("Germany") self.assertEqual(16, len(regions)) for region in regions: if self.debug: print(region) self.assertTrue(region.iso.startswith("DE")) regionNames = pc.get_region_names("Germany") self.assertEqual(16, len(regionNames)) if self.debug: print(regionNames)
def get_place_context(url=None, text=None): e = Extractor(url=url, text=text) e.find_entities() pc = PlaceContext(e.places, e.people, e.organs) pc.set_countries() pc.set_regions() pc.set_cities() pc.set_other() return pc # url = 'http://www.bbc.com/news/world-us-canada-39821789' # places = get_place_context(url=url) # len(places)
def testIssue25(self): ''' https://github.com/somnathrakshit/geograpy3/issues/25 ''' pc = PlaceContext( place_names=["Bulgaria", "Croatia", "Czech Republic", "Hungary"]) if self.debug: print(pc.countries)
def get_place_context(url=None, text=None, labels=Labels.default, debug=False): ''' Get a place context for a given text with information about country, region, city and other based on NLTK Named Entities in the label set Geographic(GPE), Person(PERSON) and Organization(ORGANIZATION). Args: url(String): the url to read text from (if any) text(String): the text to analyze debug(boolean): if True show debug information Returns: pc: PlaceContext: the place context ''' e = Extractor(url=url, text=text, debug=debug) e.find_entities(labels=labels) pc = PlaceContext(e.places) pc.setAll() return pc
def testPlaces(self): ''' test places ''' pc = PlaceContext(['Ngong', 'Nairobi', 'Kenya'],setAll=False) pc.setAll() if self.debug: print (pc) self.assertEqual(1,len(pc.countries)) self.assertEqual("Kenya",pc.countries[0]) assert len(pc.cities) == 1 # assert len(pc.other) == 1 # assert 'Ngong' in pc.other assert pc.cities_for_name('Nairobi')[0][4] == 'Kenya' assert pc.regions_for_name('Ohio')[0][4] == 'United States' pc = PlaceContext(['Mumbai']) if self.debug: print(pc)
def testPlaces(self): ''' test places ''' pc = PlaceContext(['Ngong', 'Nairobi', 'Kenya'], setAll=False) pc.setAll() if self.debug: print(pc) self.assertEqual(1, len(pc.countries)) self.assertEqual("Kenya", pc.countries[0]) self.assertEqual(2, len(pc.cities)) cityNames = ['Nairobi', 'Ohio', 'Amsterdam'] countries = ['Kenya', 'United States', 'Netherlands'] for index, cityName in enumerate(cityNames): cities = pc.cities_for_name(cityName) country = cities[0].country self.assertEqual(countries[index], country.name) pc = PlaceContext(['Mumbai']) if self.debug: print(pc)
def testPlaces(self): ''' test places ''' pc = PlaceContext(['Ngong', 'Nairobi', 'Kenya']) pc.set_countries() pc.set_regions() pc.set_cities() pc.set_other() if self.debug: print("countries=%s" % pc.countries) print("cities=%s" % pc.cities) assert len(pc.countries) == 3 assert len(pc.cities) == 1 # assert len(pc.other) == 1 # assert 'Ngong' in pc.other assert pc.cities_for_name('Nairobi')[0][4] == 'Kenya' assert pc.regions_for_name('Ohio')[0][4] == 'United States' pc = PlaceContext(['Mumbai']) pc.set_countries() pc.set_regions() pc.set_cities() pc.set_other() if self.debug: print(pc)
stop_regions = { 'city': [ 'sign', 'home', 'shop', 'us', 'about', 'unknown', 'industry', 'other', 'welcome', 'contact', 'jobs', 'job', 'staff', 'chesapeake', 'find', 'laboratory', 'services', 'email', 'register', 'company', 'locate', 'zip', 'close', 'else', 'city', 'make', 'model', 'yes', 'login', 'location', 'are', 'share', 'log', 'hours', 'remember', 'force', 'cart', 'our', 'telephone', 'posts', 'those', 'sort', 'many', 'even' ], 'country': ['at', 'by', 'to'], 'state': ['follow', 'at', 'by', 'to', 'and'], 'city_district': [], 'suburb': [] } keys = ['suburb', 'city_district', 'city', 'state', 'country'] pc = PlaceContext('') db_locations = {} secondary_keys = [ 'house_number', 'road', 'unit', 'po_box', 'postcode', 'suburb' ] ignored = [ 'house', 'near', 'category', 'level', 'island', 'country_region', 'world_region' ] def get_key(dict_keys, loc): loc_key = '' for _key in dict_keys: if _key in loc: loc_key += loc[_key]
def test(): pc = PlaceContext(['Ngong', 'Nairobi', 'Kenya']) pc.set_countries() pc.set_regions() pc.set_cities() pc.set_other() assert len(pc.countries) == 1 assert len(pc.cities) == 1 assert len(pc.other) == 1 assert 'Ngong' in pc.other assert pc.cities_for_name('Nairobi')[0][4] == 'Kenya' assert pc.regions_for_name('Ohio')[0][4] == 'United States' pc = PlaceContext(['Aleppo', 'Syria']) pc.set_countries() pc.set_regions() pc.set_cities() pc.set_other() assert 'Aleppo' in pc.cities