def testGetFields(self): ''' tests field extraction from list of JSONAble objects and LoD ''' lod=[ { "name":"Test", "label":1 }, { "name": "Test 2", "label": 2 }, { "name":"Different", "location":"Munich" } ] expectedFields=["name","label","location"] actualFieldsLoD=LOD.getFields(lod) self.assertEqual(actualFieldsLoD, expectedFields) jsonAbleList=JSONAbleList(clazz=JSONAble) jsonAbleList.fromLoD(lod) loj=jsonAbleList.getList() actualFieldsLoJ=LOD.getFields(loj) self.assertEqual(actualFieldsLoJ, expectedFields)
def testDBPediaCities(self): ''' https://github.com/LITMUS-Benchmark-Suite/dbpedia-graph-convertor/blob/master/get_data.py ''' # kglf return dbpedia = self.getDBPedia() limit = 100 # Query to get the population of cities citiesWithPopulationQuery = """ PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX dbp: <http://dbpedia.org/property/> PREFIX dbr: <http://dbpedia.org/resource/> SELECT DISTINCT ?dbCity ?country ?name ?website ?population WHERE { ?dbCity a dbo:City . ?dbCity dbp:name ?name . ?dbCity dbo:country ?country . OPTIONAL { ?dbCity dbo:populationTotal ?population . } OPTIONAL { ?dbCity dbp:website ?website . } } LIMIT %d """ % limit cityList = dbpedia.queryAsListOfDicts(citiesWithPopulationQuery) cim = CityManager("dbpedia") LOD.setNone4List(cityList, ["population", "website"]) cim.store(cityList)
def test_round_trip(self): ''' tests the csv round trip: dict -> csv -> dict Note: the inital dict has missing values it is expected that the final dict has the missing keys with None as value ''' fileName = "%s/%s.csv" % (self.testFolder, self.test_round_trip.__name__) csvLOD = [{ "pageTitle": "page_1", "name": "Test Page 1", "label": "1" }, { "name": "Test Page 2", "label": "2" }, { "pageTitle": "page_3", "label": "3" }, { "pageTitle": "page_4", "name": "Test Page 4" }] CSV.storeToCSVFile(csvLOD, fileName, withPostfix=True) actualLOD = CSV.restoreFromCSVFile(fileName, withPostfix=True) # build expected LOD expectedLOD = csvLOD.copy() fields = LOD.getFields(expectedLOD) LOD.setNone4List(expectedLOD, fields) self.assertEqual(expectedLOD, actualLOD)
def toCSV(lod:list, includeFields:list=None, excludeFields:list=None, delimiter=",",quoting=csv.QUOTE_NONNUMERIC, **kwargs): ''' converts the given lod to CSV string. For details about the csv dialect parameters see https://docs.python.org/3/library/csv.html#csv-fmt-params Args: lod(list): lod that should be converted to csv string includeFields(list): list of fields that should be included in the csv (positive list) excludeFields(list): list of fields that should be excluded from the csv (negative list) kwargs: csv dialect parameters Returns: csv string of the given lod ''' if lod is None: return '' if isinstance(lod[0], JSONAble): lod=[vars(d) for d in lod] if excludeFields is not None: lod=LOD.filterFields(lod, excludeFields) if includeFields is None: fields = LOD.getFields(lod) else: fields=includeFields lod=LOD.filterFields(lod, includeFields, reverse=True) csvStream = io.StringIO() dict_writer = csv.DictWriter(csvStream, fieldnames=fields, delimiter=delimiter, quoting=quoting, **kwargs) dict_writer.writeheader() dict_writer.writerows(lod) csvString = csvStream.getvalue() return csvString
def fromLoD(self, lod, append: bool = True, debug: bool = False): ''' load my entityList from the given list of dicts Args: lod(list): the list of dicts to load append(bool): if True append to my existing entries Return: list: a list of errors (if any) ''' errors = [] entityList = self.getList() if not append: del entityList[:] if self.handleInvalidListTypes: LOD.handleListTypes(lod=lod, doFilter=self.filterInvalidListTypes) for record in lod: # call the constructor to get a new instance try: entity = self.clazz() entity.fromDict(record) entityList.append(entity) except Exception as ex: error = {self.listName: record, "error": ex} errors.append(error) if debug: print(error) return errors
def store(self,listOfRecords,entityInfo,executeMany=False,fixNone=False): ''' store the given list of records based on the given entityInfo Args: listOfRecords(list): the list of Dicts to be stored entityInfo(EntityInfo): the meta data to be used for storing executeMany(bool): if True the insert command is done with many/all records at once fixNone(bool): if True make sure empty columns in the listOfDict are filled with "None" values ''' insertCmd=entityInfo.insertCmd record=None index=0 try: if executeMany: if fixNone: LOD.setNone4List(listOfRecords, entityInfo.typeMap.keys()) self.c.executemany(insertCmd,listOfRecords) else: for record in listOfRecords: index+=1 if fixNone: LOD.setNone(record, entityInfo.typeMap.keys()) self.c.execute(insertCmd,record) self.c.commit() except sqlite3.ProgrammingError as pe: msg=pe.args[0] if "You did not supply a value for binding" in msg: if ":" in msg: # sqlite now returns the parameter name not the number # You did not supply a value for binding parameter :type. columnName=re.findall(r':([a-zA-Z][a-zA-Z0-9_]*)',msg)[0] columnName=columnName.replace(":","") else: # pre python 3.10 # You did not supply a value for binding 2. columnIndex=int(re.findall(r'\d+',msg)[0]) columnName=list(entityInfo.typeMap.keys())[columnIndex-1] debugInfo=self.getDebugInfo(record, index, executeMany) raise Exception("%s\nfailed: no value supplied for column '%s'%s" % (insertCmd,columnName,debugInfo)) else: raise pe except sqlite3.InterfaceError as ie: msg=ie.args[0] if "Error binding parameter" in msg: columnName=re.findall(r':[_a-zA-Z]\w*',msg)[0] debugInfo=self.getDebugInfo(record, index, executeMany) raise Exception("%s\nfailed: error binding column '%s'%s" % (insertCmd,columnName,debugInfo)) else: raise ie except Exception as ex: debugInfo=self.getDebugInfo(record, index, executeMany) msg="%s\nfailed:%s%s" % (insertCmd,str(ex),debugInfo) raise Exception(msg)
def series(): valueMap = { "homepage": lambda value: Link(url=value, title=value), "archive": lambda volume: Link( url= f"http://sunsite.informatik.rwth-aachen.de/ftp/pub/publications/CEUR-WS/Vol-{volume.get('number')}.zip", title=f"Vol-{volume.get('number')}.zip"), "urn": lambda value: Link(url=f"https://nbn-resolving.org/{value}", title=value), } values = [v.__dict__.copy() for v in self.ceurws.getList()] for volume in values: for key, function in valueMap.items(): if key in volume: volume[key] = function(volume[key]) else: volume[key] = function(volume) headers = {v: v for v in LOD.getFields(values)} volumes = LodTable(values, headers=headers, name="Volumes", isDatatable=True) return render_template('volumes.html', volumes=volumes)
def toJSON(self, limitToSampleFields: bool = False): ''' Args: limitToSampleFields(bool): If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False. Returns: a recursive JSON dump of the dicts of my objects ''' data = {} if limitToSampleFields: samples = self.getJsonTypeSamples() sampleFields = LOD.getFields(samples) if isinstance(self, JSONAbleList): limitedRecords = [] for record in self.__dict__[self.listName]: limitedRecord = {} for key, value in record.__dict__.items(): if key in sampleFields: limitedRecord[key] = value limitedRecords.append(limitedRecord) data[self.listName] = limitedRecords else: for key, value in self.__dict__.items(): if key in sampleFields: data[key] = value else: data = self jsonStr = json.dumps(data, default=lambda v: self.toJsonAbleValue(v), sort_keys=True, indent=JSONAbleSettings.indent) return jsonStr
def getLookup(self, attrName: str, withDuplicates: bool = False): ''' create a lookup dictionary by the given attribute name Args: attrName(str): the attribute to lookup withDuplicates(bool): whether to retain single values or lists Return: a dictionary for lookup or a tuple dictionary,list of duplicates depending on withDuplicates ''' return LOD.getLookup(self.getList(), attrName, withDuplicates)
def asListOfDicts(self, records, fixNone: bool = False, sampleCount: int = None): ''' convert SPARQL result back to python native Args: record(list): the list of bindings fixNone(bool): if True add None values for empty columns in Dict sampleCount(int): the number of samples to check Returns: list: a list of Dicts ''' resultList = [] fields = None if fixNone: fields = LOD.getFields(records, sampleCount) for record in records: resultDict = {} for keyValue in record.items(): key, value = keyValue datatype = value.datatype if datatype is not None: if datatype == "http://www.w3.org/2001/XMLSchema#integer": resultValue = int(value.value) elif datatype == "http://www.w3.org/2001/XMLSchema#decimal": resultValue = float(value.value) elif datatype == "http://www.w3.org/2001/XMLSchema#boolean": resultValue = value.value in ['TRUE', 'true'] elif datatype == "http://www.w3.org/2001/XMLSchema#date": dt = datetime.datetime.strptime( value.value, "%Y-%m-%d") resultValue = dt.date() elif datatype == "http://www.w3.org/2001/XMLSchema#dateTime": dt = SPARQL.strToDatetime(value.value, debug=self.debug) resultValue = dt else: # unsupported datatype resultValue = value.value else: resultValue = value.value resultDict[key] = resultValue if fixNone: for field in fields: if not field in resultDict: resultDict[field] = None resultList.append(resultDict) return resultList
def testListIntersect(self): ''' test a list intersection ''' lod1 = [ { "name":"London" }, { "name":"Athens" } ] lod2 = [ { "name":"Athens" }, { "name":"Paris" } ] lodi = LOD.intersect(lod1, lod2, "name") self.assertEqual(1, len(lodi)) self.assertEqual("Athens", lodi[0]["name"]) pass
def testGetLookupIssue31And32(self): ''' test for https://github.com/WolfgangFahl/pyLoDStorage/issues/31 test for https://github.com/WolfgangFahl/pyLoDStorage/issues/32 ''' lod = [ { "name": "Athens", "Q": 1524}, { "name": "Paris", "Q": 90}, { "name": ["München", "Munich"], "Q": 1726}, { "name": "Athens", "Q": 1524}, ] cityMap,duplicates = LOD.getLookup(lod, "name") if self.debug: print(cityMap) self.assertEqual(1,len(duplicates)) self.assertEqual(4,len(cityMap)) self.assertEqual(cityMap["München"],cityMap["Munich"])
def showEntity(self,entity:JSONAble): ''' show the given entity ''' title=entity.__class__.__name__ samples=entity.getJsonTypeSamples() sampleFields = LOD.getFields(samples) dictList=[] for key in sampleFields: if hasattr(entity,key): value=getattr(entity,key) else: value="-" keyValue={"key": key, 'value':value} dictList.append(keyValue) lodKeys=["key","value"] tableHeaders=lodKeys return render_template('datatable.html',title=title,menu=self.getMenuList(),dictList=dictList,lodKeys=lodKeys,tableHeaders=tableHeaders)
def checkHandleListTypeResult(self,lod,expectedLen,expected): ''' check the result of the handleListType function Args: lod(list): the list of dicts to check expectedLen(int): the expected Length expected(str): the expected entry for the München,Munich Q1524 record with a list ''' if self.debug: print(lod) self.assertEqual(expectedLen,len(lod)) cityByQ,_duplicates=LOD.getLookup(lod, "Q") if self.debug: print(cityByQ) if expected is not None: munichRecord=cityByQ[1726] self.assertEqual(expected,munichRecord["name"]) else: self.assertFalse(1726 in cityByQ)
def testIssue20And76(self): ''' see https://github.com/WolfgangFahl/pyLoDStorage/issues/20 add fixNone option to SPARQL results (same functionality as in SQL) https://github.com/WolfgangFahl/pyLoDStorage/issues/76 SPARQL GET method support ''' endpoint = "https://query.wikidata.org/sparql" for method in ["POST", "GET"]: wd = SPARQL(endpoint, method=method) queryString = """ # Conference Series wikidata query # see https://confident.dbis.rwth-aachen.de/dblpconf/wikidata # WF 2021-01-30 SELECT ?confSeries ?short_name ?official_website WHERE { # scientific conference series (Q47258130) ?confSeries wdt:P31 wd:Q47258130. OPTIONAL { ?confSeries wdt:P1813 ?short_name . } # official website (P856) OPTIONAL { ?confSeries wdt:P856 ?official_website } } LIMIT 200 """ lod = wd.queryAsListOfDicts(queryString, fixNone=True) fields = LOD.getFields(lod) if self.debug: print(fields) for row in lod: for field in fields: self.assertTrue(field in row)
def testListHandlingIssue33(self): ''' test for handling list ''' exampleLod = [ { "name": "Athens", "Q": 1524}, { "name": "Paris", "Q": 90}, { "name": ["München", "Munich"], "Q": 1726}, { "name": "Athens", "Q": 1524}, ] #self.debug=True lod=copy.deepcopy(exampleLod) LOD.handleListTypes(lod) self.checkHandleListTypeResult(lod, 4, "München,Munich") lod=copy.deepcopy(exampleLod) LOD.handleListTypes(lod,doFilter=True) self.checkHandleListTypeResult(lod, 3, None) lod=copy.deepcopy(exampleLod) LOD.handleListTypes(lod,separator=";") self.checkHandleListTypeResult(lod, 4, "München;Munich")