def testEntityManager(self): ''' test the entity Manager handling ''' self.debug = True for i, royals in enumerate( [Sample.getRoyals(), Sample.getRoyalsInstances()]): if self.debug: print(f"{i+1}:{royals}") sparqlConfig = StorageConfig.getSPARQL( "http://example.bitplan.com", "http://localhost:3030/example", host="localhost") # TODO use sparql Config for config in [ StorageConfig.getDefault(debug=self.debug), StorageConfig.getJSON(debug=self.debug), StorageConfig.getJsonPickle(self.debug) ]: self.configure(config) name = "royal" if i == 0 else "royalorm" clazz = None if i == 0 else Royal em = EntityManager(name=name, entityName="Royal", entityPluralName="Royals", clazz=clazz, listName="royals", config=config) em.royals = royals if i == 0: cacheFile = em.storeLoD(royals) else: cacheFile = em.store() if cacheFile is not None: self.assertTrue(os.path.isfile(cacheFile)) royalsLod = em.fromStore() self.assertTrue(isinstance(royalsLod, list)) hint = f"{i}({config.mode}):{name}" for item in royalsLod: self.assertTrue(isinstance(item, dict), f"{hint}:expecting dict") royalsList = em.getList() self.assertEqual(len(royals), len(royalsList)) for j, item in enumerate(royalsList): hint = f"{hint}/{j}" royal = royals[j] # TODO check type handling e.g. "born" self.checkItem( royal, item, ["name", "age", "numberInLine", "wikidataurl"], hint) pass
def testSqllite3Speed(self): ''' test sqlite3 speed with some 100000 artificial sample records consisting of two columns with a running index ''' limit=100000 listOfRecords=Sample.getSample(limit) self.checkListOfRecords(listOfRecords, 'Sample', 'pKey')
def testListOfCities(self): ''' test sqlite3 with some 120000 city records ''' listOfRecords=Sample.getCities() for fixDates in [True,False]: retrievedList=self.checkListOfRecords(listOfRecords,'City',fixDates=fixDates) self.assertEqual(len(listOfRecords),len(retrievedList))
def testSqlite3(self): ''' test sqlite3 with a few records from the royal family ''' listOfRecords=Sample.getRoyals() resultList=self.checkListOfRecords(listOfRecords, 'Person', 'name',debug=True) if self.debug: print(resultList) self.assertEqual(listOfRecords,resultList)
def testDob(self): ''' test the DOB (date of birth) function that converts from ISO-Date to datetime.date ''' dt = Sample.dob("1926-04-21") self.assertEqual(1926, dt.year) self.assertEqual(4, dt.month) self.assertEqual(21, dt.day)
def testCities(self): ''' test a list of cities ''' cityList=Sample.getCities() self.assertEqual(128769,(len(cityList))) cityIter=iter(cityList) #limit=len(cityList) limit=1000 if getpass.getuser()=="travis": limit=4000 for i in range(limit): city=next(cityIter) city['dgraph.type']='City' lat=float(city['lat']) lng=float(city['lng']) city['location']={'type': 'Point', 'coordinates': [lng,lat] } #print("%d: %s" % (i,city)) dgraph=self.getDGraph() dgraph.drop_all() schema=''' name: string @index(exact) . country: string . lat: float . lng: float . location: geo . type City { name lat lng location country }''' dgraph.addSchema(schema) startTime=time.time() dgraph.addData(obj=cityList,limit=limit,batchSize=250) query='''{ # get cities cities(func: has(name)) { country name lat lng location } } ''' elapsed=time.time()-startTime print ("dgraph:adding %d records took %5.3f s => %5.f records/s" % (limit,elapsed,limit/elapsed)) startTime=time.time() queryResult=dgraph.query(query) elapsed=time.time()-startTime print ("dgraph:query of %d records took %5.3f s => %5.f records/s" % (limit,elapsed,limit/elapsed)) self.assertTrue('cities' in queryResult) qCityList=queryResult['cities'] self.assertEqual(limit,len(qCityList)) dgraph.close()
def testIssue25(self): ''' see https://github.com/WolfgangFahl/pyLoDStorage/issues/25 ''' listOfRecords = Sample.getRoyals() df= pd.DataFrame(listOfRecords) self.assertEqual(len(df), len(listOfRecords)) self.assertEqual(len(df.columns.values), len(listOfRecords[0].keys())) averageAge= df['age'].mean() self.assertIsNotNone(averageAge) self.assertGreater(averageAge,53)
def getSampleTableDB(withDrop=False,debug=False,failIfTooFew=False,sampleSize=1000): listOfRecords=Sample.getSample(sampleSize) sqlDB=SQLDB() entityName="sample" primaryKey='pKey' sampleRecordCount=sampleSize*10 sqlDB.debug=debug entityInfo=sqlDB.createTable(listOfRecords, entityName, primaryKey=primaryKey, withDrop=withDrop, sampleRecordCount=sampleRecordCount,failIfTooFew=failIfTooFew) executeMany=True fixNone=True sqlDB.store(listOfRecords,entityInfo,executeMany=executeMany,fixNone=fixNone) return sqlDB
def testListOfDictInsert(self): ''' test inserting a list of Dicts and retrieving the values again using a person based example instead of https://en.wikipedia.org/wiki/FOAF_(ontology) we use an object oriented derivate of FOAF with a focus on datatypes ''' listofDicts = Sample.getRoyals() typedLiteralModes = [True, False] entityType = 'foafo:Person' primaryKey = 'name' prefixes = 'PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/>' for typedLiteralMode in typedLiteralModes: jena = self.getJena(mode='update', typedLiterals=typedLiteralMode, debug=self.debug) deleteString = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/> DELETE WHERE { ?person a 'foafo:Person'. ?person ?p ?o. } """ jena.query(deleteString) errors = jena.insertListOfDicts(listofDicts, entityType, primaryKey, prefixes) self.checkErrors(errors) jena = self.getJena(mode="query", debug=self.debug) queryString = """ PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/> SELECT ?name ?born ?numberInLine ?wikidataurl ?age ?ofAge ?lastmodified WHERE { ?person a 'foafo:Person'. ?person foafo:Person_name ?name. ?person foafo:Person_born ?born. ?person foafo:Person_numberInLine ?numberInLine. ?person foafo:Person_wikidataurl ?wikidataurl. ?person foafo:Person_age ?age. ?person foafo:Person_ofAge ?ofAge. ?person foafo:Person_lastmodified ?lastmodified. }""" personResults = jena.query(queryString) self.assertEqual(len(listofDicts), len(personResults)) personList = jena.asListOfDicts(personResults) for index, person in enumerate(personList): if self.debug: print("%d: %s" % (index, person)) # check the correct round-trip behavior self.assertEqual(listofDicts, personList)
def testRoyals(self): ''' test conversion of royals ''' return # TODO - fix me inlod = Sample.getRoyals() csv = CSV.toCSV(inlod) if self.debug: print(csv) # https://stackoverflow.com/questions/3717785/what-is-a-convenient-way-to-store-and-retrieve-boolean-values-in-a-csv-file outlod = CSV.fromCSV(csv) if self.debug: print(outlod)
def testBackup(self): ''' test creating a backup of the SQL database ''' if sys.version_info >= (3, 7): listOfRecords=Sample.getCities() self.checkListOfRecords(listOfRecords,'City',fixDates=True,doClose=False) backupDB="/tmp/testSqlite.db" showProgress=200 if self.debug else 0 self.sqlDB.backup(backupDB,profile=self.debug,showProgress=showProgress) size=os.stat(backupDB).st_size if self.debug: print ("size of backup DB is %d" % size) self.assertTrue(size>600000) self.sqlDB.close() # restore ramDB=SQLDB.restore(backupDB, SQLDB.RAM, profile=self.debug,showProgress=showProgress) entityInfo=EntityInfo(listOfRecords[:50],'City',debug=self.debug) allCities=ramDB.queryAll(entityInfo) self.assertEqual(len(allCities),len(listOfRecords))
def testCopy(self): ''' test copying databases into another database ''' dbFile="/tmp/DAWT_Sample3x1000.db" copyDB=SQLDB(dbFile) for sampleNo in range(3): listOfRecords=Sample.getSample(1000) self.checkListOfRecords(listOfRecords, 'Sample_%d_1000' %sampleNo, 'pKey',doClose=False) self.sqlDB.copyTo(copyDB) size=os.stat(dbFile).st_size if self.debug: print ("size of copy DB is %d" % size) self.assertTrue(size>70000) tableList=copyDB.getTableList() if self.debug: print(tableList) for sampleNo in range(3): self.assertEqual('Sample_%d_1000' %sampleNo,tableList[sampleNo]['name']) # check that database is writable # https://stackoverflow.com/a/44707371/1497139 copyDB.execute("pragma user_version=0")
def testListOfDictSpeed(self): ''' test the speed of adding data ''' limit = 5000 for batchSize in [None, 1000]: listOfDicts = Sample.getSample(limit) jena = self.getJena(mode='update', profile=self.profile) entityType = "ex:TestRecord" primaryKey = 'pkey' prefixes = 'PREFIX ex: <http://example.com/>' startTime = time.time() errors = jena.insertListOfDicts(listOfDicts, entityType, primaryKey, prefixes, batchSize=batchSize) self.checkErrors(errors) elapsed = time.time() - startTime if self.profile: print("adding %d records took %5.3f s => %5.f records/s" % (limit, elapsed, limit / elapsed))
def testIssue15(self): ''' https://github.com/WolfgangFahl/pyLoDStorage/issues/15 auto create view ddl in mergeschema ''' self.sqlDB=SQLDB(debug=self.debug,errorDebug=self.debug) listOfRecords=Sample.getRoyals() entityInfo=EntityInfo(listOfRecords[:3],'Person','name',debug=self.debug) entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityInfo.name,entityInfo.primaryKey) listOfRecords=[{'name': 'Royal family', 'country': 'UK', 'lastmodified':datetime.now()}] entityInfo=self.sqlDB.createTable(listOfRecords[:10],'Family','name') tableList=self.sqlDB.getTableList() viewDDL=Schema.getGeneralViewDDL(tableList,"PersonBase") if self.debug: print (viewDDL) expected="""CREATE VIEW PersonBase AS SELECT name,lastmodified FROM Person UNION SELECT name,lastmodified FROM Family""" self.assertEqual(expected,viewDDL) pass
def testIssue24_IntegrateTabulate(self): ''' https://github.com/WolfgangFahl/pyLoDStorage/issues/24 test https://pypi.org/project/tabulate/ support ''' show=self.debug #show=True royals=Royals(load=True) for fmt in ["latex","grid","mediawiki","github"]: table=tabulate(royals.royals,headers="keys",tablefmt=fmt) if show: print (table) cities=Sample.getCities() counter=Counter() for city in cities: counter[city["country"]]+=1; tabulateCounter=TabulateCounter(counter) for fmt in ["latex","grid","mediawiki","github"]: table=tabulateCounter.mostCommonTable(tablefmt=fmt,limit=7) if show: print(table) pass
def testEntityInfo(self): ''' test creating entityInfo from the sample record ''' listOfRecords=Sample.getRoyals() entityInfo=EntityInfo(listOfRecords[:3],'Person','name',debug=True) self.assertEqual("CREATE TABLE Person(name TEXT PRIMARY KEY,born DATE,numberInLine INTEGER,wikidataurl TEXT,age FLOAT,ofAge BOOLEAN,lastmodified TIMESTAMP)",entityInfo.createTableCmd) self.assertEqual("INSERT INTO Person (name,born,numberInLine,wikidataurl,age,ofAge,lastmodified) values (:name,:born,:numberInLine,:wikidataurl,:age,:ofAge,:lastmodified)",entityInfo.insertCmd) self.sqlDB=SQLDB(debug=self.debug,errorDebug=True) entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityInfo.name,entityInfo.primaryKey) tableList=self.sqlDB.getTableList() if self.debug: print (tableList) self.assertEqual(1,len(tableList)) personTable=tableList[0] self.assertEqual("Person",personTable['name']) self.assertEqual(7,len(personTable['columns'])) uml=UML() plantUml=uml.tableListToPlantUml(tableList,packageName="Royals",withSkin=False) if self.debug: print(plantUml) expected="""package Royals { class Person << Entity >> { age : FLOAT born : DATE lastmodified : TIMESTAMP name : TEXT <<PK>> numberInLine : INTEGER ofAge : BOOLEAN wikidataurl : TEXT } } """ self.assertEqual(expected,plantUml) # testGeneralization listOfRecords=[{'name': 'Royal family', 'country': 'UK', 'lastmodified':datetime.now()}] entityInfo=self.sqlDB.createTable(listOfRecords[:10],'Family','name') tableList=self.sqlDB.getTableList() self.assertEqual(2,len(tableList)) uml=UML() plantUml=uml.tableListToPlantUml(tableList,generalizeTo="PersonBase",withSkin=False) if self.debug: print(plantUml) expected='''class PersonBase << Entity >> { lastmodified : TIMESTAMP name : TEXT <<PK>> } class Person << Entity >> { age : FLOAT born : DATE numberInLine : INTEGER ofAge : BOOLEAN wikidataurl : TEXT } class Family << Entity >> { country : TEXT } PersonBase <|-- Person PersonBase <|-- Family ''' self.assertEqual(expected,plantUml)
def testCountries(self): ''' test handling countries ''' countryList=Sample.getCountries() #print(countryList) dgraph=self.getDGraph() dgraph.drop_all() schema=''' name: string @index(exact) . code: string @index(exact) . capital: string . location: geo . type Country { code name location capital }''' dgraph.addSchema(schema) for country in countryList: # rename dictionary keys #country['name']=country.pop('Name') country['code']=country.pop('country_code') country['dgraph.type']='Country' lat,lng=country.pop('latlng') country['location']={'type': 'Point', 'coordinates': [lng,lat] } print(country) dgraph.addData(countryList) query='''{ # list of countries countries(func: has(code)) { uid name code capital location } }''' queryResult=dgraph.query(query) self.assertTrue("countries" in queryResult) countries=queryResult["countries"] self.assertEqual(247,len(countries)) schemaResult=dgraph.query("schema{}") print(schemaResult) self.assertTrue("schema" in schemaResult) schema=schemaResult["schema"] self.assertTrue(len(schema)>=7) # see https://discuss.dgraph.io/t/running-upsert-in-python/9364 """mutation=''' upsert { query { # get the uids of all Country nodes countries as var (func: has(<dgraph.type>)) @filter(eq(<dgraph.type>, "Country")) { uid } } mutation { delete { uid(countries) * * . } } }''' dgraph.mutate(mutation)""" dgraph.close