Пример #1
0
 def testGetFields(self):
     '''
     tests field extraction from list of JSONAble objects and LoD
     '''
     lod=[
         {
             "name":"Test",
             "label":1
         },
         {
             "name": "Test 2",
             "label": 2
         },
         {
             "name":"Different",
             "location":"Munich"
         }
     ]
     expectedFields=["name","label","location"]
     actualFieldsLoD=LOD.getFields(lod)
     self.assertEqual(actualFieldsLoD, expectedFields)
     jsonAbleList=JSONAbleList(clazz=JSONAble)
     jsonAbleList.fromLoD(lod)
     loj=jsonAbleList.getList()
     actualFieldsLoJ=LOD.getFields(loj)
     self.assertEqual(actualFieldsLoJ, expectedFields)
Пример #2
0
 def testDBPediaCities(self):
     '''
     https://github.com/LITMUS-Benchmark-Suite/dbpedia-graph-convertor/blob/master/get_data.py
     '''
     # kglf
     return
     dbpedia = self.getDBPedia()
     limit = 100
     # Query to get the population of cities
     citiesWithPopulationQuery = """
         PREFIX dbo: <http://dbpedia.org/ontology/>
         PREFIX dbp: <http://dbpedia.org/property/>
         PREFIX dbr: <http://dbpedia.org/resource/>
         SELECT DISTINCT ?dbCity ?country ?name ?website ?population
         WHERE {
             ?dbCity a dbo:City .
             ?dbCity dbp:name ?name .
             ?dbCity dbo:country ?country .
             OPTIONAL { ?dbCity dbo:populationTotal ?population . }
             OPTIONAL { ?dbCity dbp:website ?website . }
         }
         LIMIT %d
         """ % limit
     cityList = dbpedia.queryAsListOfDicts(citiesWithPopulationQuery)
     cim = CityManager("dbpedia")
     LOD.setNone4List(cityList, ["population", "website"])
     cim.store(cityList)
Пример #3
0
 def test_round_trip(self):
     '''
     tests the csv round trip: dict -> csv -> dict
     Note: the inital dict has missing values it is expected that the final dict has the missing keys with None as value
     '''
     fileName = "%s/%s.csv" % (self.testFolder,
                               self.test_round_trip.__name__)
     csvLOD = [{
         "pageTitle": "page_1",
         "name": "Test Page 1",
         "label": "1"
     }, {
         "name": "Test Page 2",
         "label": "2"
     }, {
         "pageTitle": "page_3",
         "label": "3"
     }, {
         "pageTitle": "page_4",
         "name": "Test Page 4"
     }]
     CSV.storeToCSVFile(csvLOD, fileName, withPostfix=True)
     actualLOD = CSV.restoreFromCSVFile(fileName, withPostfix=True)
     # build expected LOD
     expectedLOD = csvLOD.copy()
     fields = LOD.getFields(expectedLOD)
     LOD.setNone4List(expectedLOD, fields)
     self.assertEqual(expectedLOD, actualLOD)
Пример #4
0
    def toCSV(lod:list, includeFields:list=None, excludeFields:list=None, delimiter=",",quoting=csv.QUOTE_NONNUMERIC, **kwargs):
        '''
        converts the given lod to CSV string.
        For details about the csv dialect parameters see https://docs.python.org/3/library/csv.html#csv-fmt-params

        Args:
            lod(list): lod that should be converted to csv string
            includeFields(list): list of fields that should be included in the csv (positive list)
            excludeFields(list): list of fields that should be excluded from the csv (negative list)
            kwargs: csv dialect parameters
        Returns:
            csv string of the given lod
        '''
        if lod is None:
            return ''
        if isinstance(lod[0], JSONAble):
            lod=[vars(d) for d in lod]
        if excludeFields is not None:
            lod=LOD.filterFields(lod, excludeFields)
        if includeFields is None:
            fields = LOD.getFields(lod)
        else:
            fields=includeFields
            lod=LOD.filterFields(lod, includeFields, reverse=True)
        csvStream = io.StringIO()
        dict_writer = csv.DictWriter(csvStream, fieldnames=fields, delimiter=delimiter, quoting=quoting, **kwargs)
        dict_writer.writeheader()
        dict_writer.writerows(lod)
        csvString = csvStream.getvalue()
        return csvString
Пример #5
0
    def fromLoD(self, lod, append: bool = True, debug: bool = False):
        '''
        load my entityList from the given list of dicts
        
        Args:
            lod(list): the list of dicts to load
            append(bool): if True append to my existing entries
            
        Return:
            list: a list of errors (if any)
        
        '''
        errors = []
        entityList = self.getList()
        if not append:
            del entityList[:]
        if self.handleInvalidListTypes:
            LOD.handleListTypes(lod=lod, doFilter=self.filterInvalidListTypes)

        for record in lod:
            # call the constructor to get a new instance
            try:
                entity = self.clazz()
                entity.fromDict(record)
                entityList.append(entity)
            except Exception as ex:
                error = {self.listName: record, "error": ex}
                errors.append(error)
                if debug:
                    print(error)
        return errors
Пример #6
0
 def store(self,listOfRecords,entityInfo,executeMany=False,fixNone=False):
     '''
     store the given list of records based on the given entityInfo
     
     Args:
       
        listOfRecords(list): the list of Dicts to be stored
        entityInfo(EntityInfo): the meta data to be used for storing
        executeMany(bool): if True the insert command is done with many/all records at once
        fixNone(bool): if True make sure empty columns in the listOfDict are filled with "None" values
     '''
     insertCmd=entityInfo.insertCmd
     record=None
     index=0
     try:
         if executeMany:
             if fixNone:
                 LOD.setNone4List(listOfRecords, entityInfo.typeMap.keys())
             self.c.executemany(insertCmd,listOfRecords)
         else:
             for record in listOfRecords:
                 index+=1
                 if fixNone:
                     LOD.setNone(record, entityInfo.typeMap.keys())
                 self.c.execute(insertCmd,record)
         self.c.commit()
     except sqlite3.ProgrammingError as pe:
         msg=pe.args[0]
         if "You did not supply a value for binding" in msg:
             if ":" in msg:
                 # sqlite now returns the parameter name not the number
                 # You did not supply a value for binding parameter :type.
                 columnName=re.findall(r':([a-zA-Z][a-zA-Z0-9_]*)',msg)[0]
                 columnName=columnName.replace(":","")
             else:
                 # pre python 3.10
                 # You did not supply a value for binding 2.
                 columnIndex=int(re.findall(r'\d+',msg)[0])
                 columnName=list(entityInfo.typeMap.keys())[columnIndex-1]
             debugInfo=self.getDebugInfo(record, index, executeMany)
             raise Exception("%s\nfailed: no value supplied for column '%s'%s" % (insertCmd,columnName,debugInfo))
         else:
             raise pe
     except sqlite3.InterfaceError as ie:
         msg=ie.args[0]
         if "Error binding parameter" in msg:
             columnName=re.findall(r':[_a-zA-Z]\w*',msg)[0]
             debugInfo=self.getDebugInfo(record, index, executeMany)
             raise Exception("%s\nfailed: error binding column '%s'%s" % (insertCmd,columnName,debugInfo))
         else:
             raise ie
     except Exception as ex:
         debugInfo=self.getDebugInfo(record, index, executeMany)
         msg="%s\nfailed:%s%s" % (insertCmd,str(ex),debugInfo)
         raise Exception(msg)
Пример #7
0
 def series():
     valueMap = {
         "homepage":
         lambda value: Link(url=value, title=value),
         "archive":
         lambda volume: Link(
             url=
             f"http://sunsite.informatik.rwth-aachen.de/ftp/pub/publications/CEUR-WS/Vol-{volume.get('number')}.zip",
             title=f"Vol-{volume.get('number')}.zip"),
         "urn":
         lambda value: Link(url=f"https://nbn-resolving.org/{value}",
                            title=value),
     }
     values = [v.__dict__.copy() for v in self.ceurws.getList()]
     for volume in values:
         for key, function in valueMap.items():
             if key in volume:
                 volume[key] = function(volume[key])
             else:
                 volume[key] = function(volume)
     headers = {v: v for v in LOD.getFields(values)}
     volumes = LodTable(values,
                        headers=headers,
                        name="Volumes",
                        isDatatable=True)
     return render_template('volumes.html', volumes=volumes)
Пример #8
0
    def toJSON(self, limitToSampleFields: bool = False):
        '''

        Args:
            limitToSampleFields(bool): If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False.

        Returns:
            a recursive JSON dump of the dicts of my objects
        '''
        data = {}
        if limitToSampleFields:
            samples = self.getJsonTypeSamples()
            sampleFields = LOD.getFields(samples)
            if isinstance(self, JSONAbleList):
                limitedRecords = []
                for record in self.__dict__[self.listName]:
                    limitedRecord = {}
                    for key, value in record.__dict__.items():
                        if key in sampleFields:
                            limitedRecord[key] = value
                    limitedRecords.append(limitedRecord)
                data[self.listName] = limitedRecords
            else:
                for key, value in self.__dict__.items():
                    if key in sampleFields:
                        data[key] = value
        else:
            data = self
        jsonStr = json.dumps(data,
                             default=lambda v: self.toJsonAbleValue(v),
                             sort_keys=True,
                             indent=JSONAbleSettings.indent)
        return jsonStr
Пример #9
0
 def getLookup(self, attrName: str, withDuplicates: bool = False):
     '''
     create a lookup dictionary by the given attribute name
     
     Args:
         attrName(str): the attribute to lookup
         withDuplicates(bool): whether to retain single values or lists
     
     Return:
         a dictionary for lookup or a tuple dictionary,list of duplicates depending on withDuplicates
     '''
     return LOD.getLookup(self.getList(), attrName, withDuplicates)
Пример #10
0
 def asListOfDicts(self,
                   records,
                   fixNone: bool = False,
                   sampleCount: int = None):
     '''
     convert SPARQL result back to python native
     
     Args:
         record(list): the list of bindings
         fixNone(bool): if True add None values for empty columns in Dict
         sampleCount(int): the number of samples to check
         
     Returns:
         list: a list of Dicts
     '''
     resultList = []
     fields = None
     if fixNone:
         fields = LOD.getFields(records, sampleCount)
     for record in records:
         resultDict = {}
         for keyValue in record.items():
             key, value = keyValue
             datatype = value.datatype
             if datatype is not None:
                 if datatype == "http://www.w3.org/2001/XMLSchema#integer":
                     resultValue = int(value.value)
                 elif datatype == "http://www.w3.org/2001/XMLSchema#decimal":
                     resultValue = float(value.value)
                 elif datatype == "http://www.w3.org/2001/XMLSchema#boolean":
                     resultValue = value.value in ['TRUE', 'true']
                 elif datatype == "http://www.w3.org/2001/XMLSchema#date":
                     dt = datetime.datetime.strptime(
                         value.value, "%Y-%m-%d")
                     resultValue = dt.date()
                 elif datatype == "http://www.w3.org/2001/XMLSchema#dateTime":
                     dt = SPARQL.strToDatetime(value.value,
                                               debug=self.debug)
                     resultValue = dt
                 else:
                     # unsupported datatype
                     resultValue = value.value
             else:
                 resultValue = value.value
             resultDict[key] = resultValue
         if fixNone:
             for field in fields:
                 if not field in resultDict:
                     resultDict[field] = None
         resultList.append(resultDict)
     return resultList
Пример #11
0
 def testListIntersect(self):
     '''
     test a list intersection
     '''
     lod1 = [
        { "name":"London" },
        { "name":"Athens" }
     ]
     lod2 = [
         { "name":"Athens" },
         { "name":"Paris" }
     ]
     lodi = LOD.intersect(lod1, lod2, "name")
     self.assertEqual(1, len(lodi))
     self.assertEqual("Athens", lodi[0]["name"])
     pass
Пример #12
0
 def testGetLookupIssue31And32(self):
     '''
     test for https://github.com/WolfgangFahl/pyLoDStorage/issues/31
     test for https://github.com/WolfgangFahl/pyLoDStorage/issues/32
     '''
     lod = [
             { "name": "Athens",              "Q": 1524},
             { "name": "Paris",               "Q": 90},
             { "name": ["München", "Munich"], "Q": 1726},
             { "name": "Athens",              "Q": 1524},
         
         ]
     cityMap,duplicates = LOD.getLookup(lod, "name")
     if self.debug:
         print(cityMap)
     self.assertEqual(1,len(duplicates))
     self.assertEqual(4,len(cityMap))
     self.assertEqual(cityMap["München"],cityMap["Munich"])
Пример #13
0
 def showEntity(self,entity:JSONAble):
     '''
     show the given entity
     '''
     title=entity.__class__.__name__
     samples=entity.getJsonTypeSamples()
     sampleFields = LOD.getFields(samples)
     dictList=[]
     for key in sampleFields:
         if hasattr(entity,key):
             value=getattr(entity,key)
         else:
             value="-"
         keyValue={"key": key, 'value':value}
         dictList.append(keyValue)
     lodKeys=["key","value"]
     tableHeaders=lodKeys
     return render_template('datatable.html',title=title,menu=self.getMenuList(),dictList=dictList,lodKeys=lodKeys,tableHeaders=tableHeaders)
Пример #14
0
 def checkHandleListTypeResult(self,lod,expectedLen,expected):
     '''
     check the result of the handleListType function
     
     Args:
         lod(list): the list of dicts to check
         expectedLen(int): the expected Length
         expected(str): the expected entry for the München,Munich Q1524 record with a list
     '''
     if self.debug:
         print(lod)
     self.assertEqual(expectedLen,len(lod))  
     cityByQ,_duplicates=LOD.getLookup(lod, "Q")
     if self.debug:
         print(cityByQ)  
     if expected is not None:
         munichRecord=cityByQ[1726]
         self.assertEqual(expected,munichRecord["name"])
     else:
         self.assertFalse(1726 in cityByQ)
Пример #15
0
    def testIssue20And76(self):
        '''
        see https://github.com/WolfgangFahl/pyLoDStorage/issues/20
        add fixNone option to SPARQL results (same functionality as in SQL)
        
         https://github.com/WolfgangFahl/pyLoDStorage/issues/76
        SPARQL GET method support
        '''
        endpoint = "https://query.wikidata.org/sparql"
        for method in ["POST", "GET"]:
            wd = SPARQL(endpoint, method=method)
            queryString = """
        # Conference Series wikidata query
# see https://confident.dbis.rwth-aachen.de/dblpconf/wikidata
# WF 2021-01-30
SELECT ?confSeries ?short_name ?official_website
WHERE 
{
  #  scientific conference series (Q47258130) 
  ?confSeries wdt:P31 wd:Q47258130.
  OPTIONAL { 
    ?confSeries wdt:P1813 ?short_name . 
  }
  #  official website (P856) 
  OPTIONAL {
    ?confSeries wdt:P856 ?official_website
  } 
}
LIMIT 200
"""
            lod = wd.queryAsListOfDicts(queryString, fixNone=True)
            fields = LOD.getFields(lod)
            if self.debug:
                print(fields)
            for row in lod:
                for field in fields:
                    self.assertTrue(field in row)
Пример #16
0
 def testListHandlingIssue33(self):
     '''
     test for handling list
     '''
     exampleLod = [
             { "name": "Athens",              "Q": 1524},
             { "name": "Paris",               "Q": 90},
             { "name": ["München", "Munich"], "Q": 1726},
             { "name": "Athens",              "Q": 1524},
         
         ]
     #self.debug=True
     lod=copy.deepcopy(exampleLod)
     LOD.handleListTypes(lod)
     self.checkHandleListTypeResult(lod, 4, "München,Munich")
     lod=copy.deepcopy(exampleLod)   
     LOD.handleListTypes(lod,doFilter=True)
     self.checkHandleListTypeResult(lod, 3, None)
     lod=copy.deepcopy(exampleLod)
     LOD.handleListTypes(lod,separator=";")
     self.checkHandleListTypeResult(lod, 4, "München;Munich")