def testPopulation(self): ''' test adding population data from wikidata to GeoLite2 information ''' Locator.resetInstance() loc = Locator.getInstance() loc.populate_db() endpoint = None user = getpass.getuser() if self.debug: print("current user is %s" % user) # uncomment to refresh using wikidata # please note https://github.com/RDFLib/sparqlwrapper/issues/163 hits as of 2020-09 # endpoint='https://query.wikidata.org/sparql' # uncomment to use your own wikidata copy as an endpoint # if user=="wf": # use 2020 Apache Jena based wikidata copy #endpoint="http://jena.zeus.bitplan.com/wikidata" # use 2018 Blazegraph based wikidata copy #endpoint="http://blazegraph.bitplan.com/sparql" loc.getWikidataCityPopulation(loc.sqlDB, endpoint) tableList = loc.sqlDB.getTableList() uml = UML() title = """geograpy Tables 2020-09-26 [[https://github.com/somnathrakshit/geograpy3 © 2020 geograpy3 project]]""" plantUml = uml.tableListToPlantUml(tableList, title=title, packageName="geograpy3") if self.debug: print(plantUml)
def testDelimiters(self): ''' test the delimiter statistics for names ''' loc = Locator.getInstance() loc.populate_db() ddls = [ "DROP VIEW IF EXISTS allNames", """CREATE VIEW allNames as select name from countries union select name from regions union select name from cities""" ] for ddl in ddls: loc.sqlDB.execute(ddl) query = "SELECT name from allNames" nameRecords = loc.sqlDB.query(query) show = self.debug show = True if show: print("found %d name records" % len(nameRecords)) ordC = Counter() for nameRecord in nameRecords: name = nameRecord["name"] for char in name: code = ord(char) if code < ord("A"): ordC[code] += 1 for index, countT in enumerate(ordC.most_common(10)): code, count = countT if show: print("%d: %d %s -> %d" % (index, code, chr(code), count))
def testIsoRegexp(self): ''' test regular expression for iso codes ''' loc = Locator.getInstance() self.assertFalse(loc.isISO('Singapore')) query = """ select distinct country_iso_code as isocode from cities union select distinct subdivision_1_iso_code as isocode from cities union select distinct subdivision_1_iso_code as isocode from cities union select distinct countryIsoCode as isocode from countries union select distinct regionIsoCode as isocode from regions """ loc.populate_db() isocodeRecords = loc.sqlDB.query(query) for isocodeRecord in isocodeRecords: isocode = isocodeRecord['isocode'] if isocode: isIso = loc.isISO(isocode) if not isIso and self.debug: print(isocode) self.assertTrue(isIso)
def testHasViews(self): ''' test that the views are available ''' loc = Locator.getInstance() viewsMap = loc.sqlDB.getTableDict(tableType="view") for view in ["CityLookup", "RegionLookup", "CountryLookup"]: self.assertTrue(view in viewsMap)
def setUp(self,debug=False): ''' setUp test environment ''' TestCase.setUp(self) self.debug=debug msg=f"test {self._testMethodName}, debug={self.debug}" self.profile=Profiler(msg) Locator.resetInstance() locator=Locator.getInstance() locator.downloadDB() # actively test Wikidata tests? self.testWikidata = False
def locateCity(location, correctMisspelling=False, debug=False): ''' locate the given location string Args: location(string): the description of the location Returns: Locator: the location ''' e = Extractor(text=location, debug=debug) e.split() loc = Locator.getInstance(correctMisspelling=correctMisspelling, debug=debug) city = loc.locateCity(e.places) return city
def testLocatorWithWikiData(self): ''' test Locator ''' Locator.resetInstance() loc=Locator.getInstance() #forceUpdate=True forceUpdate=False loc.populate_db(force=forceUpdate) tableList=loc.sqlDB.getTableList() expectedCities=800000 self.assertTrue(loc.db_recordCount(tableList,"countries")>=200) self.assertTrue(loc.db_recordCount(tableList,"regions")>=3000) self.assertTrue(loc.db_recordCount(tableList,"cities")>=expectedCities)
def testQueries(self): ''' test preconfigured queries ''' qm=self.getQueryManager() self.assertIsNotNone(qm) locator=Locator.getInstance() show=self.debug #show=True for _name,query in qm.queriesByName.items(): qlod=locator.sqlDB.query(query.query) for tablefmt in ["mediawiki","github"]: self.documentQueryResult(query, qlod,tablefmt,show=show) pass
def testWordCount(self): ''' test the word count ''' loc = Locator.getInstance() query = "SELECT name from CITIES" nameRecords = loc.sqlDB.query(query) if self.debug: print("testWordCount: found %d names" % len(nameRecords)) wc = Counter() for nameRecord in nameRecords: name = nameRecord['name'] words = re.split(r"\W+", name) wc[len(words)] += 1 if self.debug: print("most common 20: %s" % wc.most_common(20))
def testQuery(self): ''' test a single query ''' queries=[("LocationLabel Count","""select count(*),hierarchy from location_labels group by hierarchy"""), ("NY example","select * from cityLookup where label='New York City'"), ("Berlin example","select * from cityLookup where label='Berlin' order by pop desc,regionName"), ("Issue #25","select * from countryLookup where label in ('France', 'Hungary', 'Poland', 'Spain', 'United Kingdom')"), ("Issue #25 Bulgaria","select * from cityLookup where label in ('Bulgaria','Croatia','Hungary','Czech Republic') order by pop desc,regionName")] for tableName in ["countries","regions","cities"]: queries.append((f"unique wikidataids for {tableName}",f"select count(distinct(wikidataid)) as wikidataids from {tableName}")) queries.append((f"total #records for {tableName}",f"select count(*) as recordcount from {tableName}")) locator=Locator.getInstance() for title,queryString in queries: query=Query(name=title,query=queryString,lang="sql") qlod=locator.sqlDB.query(queryString) for tablefmt in ["mediawiki","github"]: self.documentQueryResult(query, qlod, tablefmt, show=True)
def testUML(self): ''' test adding population data from wikidata to GeoLite2 information ''' Locator.resetInstance() loc = Locator.getInstance() loc.populate_db() user = getpass.getuser() if self.debug: print("current user is %s" % user) tableList = loc.sqlDB.getTableList() uml = UML() title = """geograpy Tables 2021-08-13 [[https://github.com/somnathrakshit/geograpy3 © 2020-2021 geograpy3 project]]""" plantUml = uml.tableListToPlantUml(tableList, title=title, packageName="geograpy3") showUml = True if showUml or self.debug: print(plantUml)
def lookupQuery(self, viewName, whereClause): loc = Locator.getInstance() queryString = f"SELECT * FROM {viewName} where {whereClause} AND pop is not NULL ORDER by pop desc" lookupRecords = loc.sqlDB.query(queryString) return lookupRecords