class RDFTestCase(): backend = 'default' path = 'store' def setUp(self): self.store = Graph(store=self.backend) self.store.open(self.path) self.store.bind("dc", "http://http://purl.org/dc/elements/1.1/") self.store.bind("foaf", "http://xmlns.com/foaf/0.1/") return self.store def tearDown(self): self.store.close() print self.store.serialize() def addDonna(self): self.donna = donna = BNode() print 'Identificador:', donna.n3() self.store.add((donna, RDF.type, FOAF["Person"])) self.store.add((donna, FOAF["nick"], Literal("donna"))) self.store.add((donna, FOAF["name"], Literal("Donna Fales"))) return self.store def testRDFXML(self): self.addDonna() g = Graph() g.parse(StringInputSource(self.store.serialize(format="pretty-xml"))) #self.assertEquals(self.store.isomorphic(g), True) print g.serialize()
class ParserTestCase(unittest.TestCase): backend = 'default' path = 'store' def setUp(self): self.graph = Graph(store=self.backend) self.graph.open(self.path) def tearDown(self): self.graph.close() def testNoPathWithHash(self): g = self.graph g.parse(StringInputSource("""\ <?xml version="1.0" encoding="UTF-8" standalone="yes"?> <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" > <rdfs:Class rdf:about="http://example.org#"> <rdfs:label>testing</rdfs:label> </rdfs:Class> </rdf:RDF> """), publicID="http://example.org") subject = URIRef("http://example.org#") label = g.value(subject, RDFS.label) self.assertEquals(label, Literal("testing")) type = g.value(subject, RDF.type) self.assertEquals(type, RDFS.Class)
class TypeCheckCase(unittest.TestCase): unstable = True # TODO: until we decide if we want to add type checking back to rdflib backend = "default" path = "store" def setUp(self): self.store = Graph(backend=self.backend) self.store.open(self.path) def tearDown(self): self.store.close() def testSubjectTypeCheck(self): self.assertRaises(SubjectTypeError, self.store.add, (None, foo, foo)) def testPredicateTypeCheck(self): self.assertRaises(PredicateTypeError, self.store.add, (foo, None, foo)) def testObjectTypeCheck(self): self.assertRaises(ObjectTypeError, self.store.add, (foo, foo, None))
class TypeCheckCase(unittest.TestCase): unstable = True # TODO: until we decide if we want to add type checking back to rdflib backend = 'default' path = 'store' def setUp(self): self.store = Graph(backend=self.backend) self.store.open(self.path) def tearDown(self): self.store.close() def testSubjectTypeCheck(self): self.assertRaises(SubjectTypeError, self.store.add, (None, foo, foo)) def testPredicateTypeCheck(self): self.assertRaises(PredicateTypeError, self.store.add, (foo, None, foo)) def testObjectTypeCheck(self): self.assertRaises(ObjectTypeError, self.store.add, (foo, foo, None))
class PychinkoTestCase(unittest.TestCase): backend = 'default' def setUp(self): self.g = Graph(store=self.backend) self.g.open(configuration=mkdtemp()) self.g.parse("test/a.n3", format="n3") def tearDown(self): self.g.close() def testPychinko(self): rules = [] for s, p, o in self.g.triples((None, LOG.implies, None)): lhs = list(patterns(s)) rhs = list(patterns(o)) rules.append(terms.Rule(lhs, rhs, (s, p, o))) interp = Interpreter(rules) f = Graph() f.parse("http://eikeon.com/") source = f source = self.g interp.addFacts(set(facts(source)), initialSet=True) interp.run()
class GraphTest(unittest.TestCase): backend = 'default' path = 'store' def setUp(self): self.store = Graph(store=self.backend) self.store.open(self.path) self.remove_me = (BNode(), RDFS.label, Literal("remove_me")) self.store.add(self.remove_me) def tearDown(self): self.store.close() def testAdd(self): subject = BNode() self.store.add((subject, RDFS.label, Literal("foo"))) def testRemove(self): self.store.remove(self.remove_me) self.store.remove((None, None, None)) def testTriples(self): for s, p, o in self.store: pass
class RDFTestCase(unittest.TestCase): backend = 'default' path = 'store' def setUp(self): self.store = Graph(store=self.backend) self.store.open(self.path) self.store.bind("dc", "http://http://purl.org/dc/elements/1.1/") self.store.bind("foaf", "http://xmlns.com/foaf/0.1/") def tearDown(self): self.store.close() def addDonna(self): self.donna = donna = BNode() self.store.add((donna, RDF.type, FOAF["Person"])) self.store.add((donna, FOAF["nick"], Literal("donna"))) self.store.add((donna, FOAF["name"], Literal("Donna Fales"))) def testRDFXML(self): self.addDonna() g = Graph() g.parse(StringInputSource(self.store.serialize(format="pretty-xml"))) self.assertEquals(self.store.isomorphic(g), True)
class GraphTestCase(unittest.TestCase): store_name = 'default' path = None slowtest = True def setUp(self): self.graph = Graph(store=self.store_name) a_tmp_dir = mkdtemp() self.path = self.path or a_tmp_dir self.graph.open(self.path) self.michel = URIRef(u'michel') self.tarek = URIRef(u'tarek') self.bob = URIRef(u'bob') self.likes = URIRef(u'likes') self.hates = URIRef(u'hates') self.pizza = URIRef(u'pizza') self.cheese = URIRef(u'cheese') def tearDown(self): self.graph.close() def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese self.graph.add((tarek, likes, pizza)) self.graph.add((tarek, likes, cheese)) self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.add((bob, likes, cheese)) self.graph.add((bob, hates, pizza)) self.graph.add((bob, hates, michel)) # gasp! def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese self.graph.remove((tarek, likes, pizza)) self.graph.remove((tarek, likes, cheese)) self.graph.remove((michel, likes, pizza)) self.graph.remove((michel, likes, cheese)) self.graph.remove((bob, likes, cheese)) self.graph.remove((bob, hates, pizza)) self.graph.remove((bob, hates, michel)) # gasp! def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese asserte = self.assertEquals triples = self.graph.triples Any = None self.addStuff() # unbound subjects asserte(len(list(triples((Any, likes, pizza)))), 2) asserte(len(list(triples((Any, hates, pizza)))), 1) asserte(len(list(triples((Any, likes, cheese)))), 3) asserte(len(list(triples((Any, hates, cheese)))), 0) # unbound objects asserte(len(list(triples((michel, likes, Any)))), 2) asserte(len(list(triples((tarek, likes, Any)))), 2) asserte(len(list(triples((bob, hates, Any)))), 2) asserte(len(list(triples((bob, likes, Any)))), 1) # unbound predicates asserte(len(list(triples((michel, Any, cheese)))), 1) asserte(len(list(triples((tarek, Any, cheese)))), 1) asserte(len(list(triples((bob, Any, pizza)))), 1) asserte(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects asserte(len(list(triples((Any, hates, Any)))), 2) asserte(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects asserte(len(list(triples((michel, Any, Any)))), 2) asserte(len(list(triples((bob, Any, Any)))), 3) asserte(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates asserte(len(list(triples((Any, Any, pizza)))), 3) asserte(len(list(triples((Any, Any, cheese)))), 3) asserte(len(list(triples((Any, Any, michel)))), 1) # all unbound asserte(len(list(triples((Any, Any, Any)))), 7) self.removeStuff() asserte(len(list(triples((Any, Any, Any)))), 0) def testStatementNode(self): graph = self.graph from rdflib.Statement import Statement c = URIRef("http://example.org/foo#c") r = URIRef("http://example.org/foo#r") s = Statement((self.michel, self.likes, self.pizza), c) graph.add((s, RDF.value, r)) self.assertEquals(r, graph.value(s, RDF.value)) self.assertEquals(s, graph.value(predicate=RDF.value, object=r)) def testGraphValue(self): from rdflib.Graph import GraphValue graph = self.graph alice = URIRef("alice") bob = URIRef("bob") pizza = URIRef("pizza") cheese = URIRef("cheese") g1 = Graph() g1.add((alice, RDF.value, pizza)) g1.add((bob, RDF.value, cheese)) g1.add((bob, RDF.value, pizza)) g2 = Graph() g2.add((bob, RDF.value, pizza)) g2.add((bob, RDF.value, cheese)) g2.add((alice, RDF.value, pizza)) gv1 = GraphValue(store=graph.store, graph=g1) gv2 = GraphValue(store=graph.store, graph=g2) graph.add((gv1, RDF.value, gv2)) v = graph.value(gv1) #print type(v) self.assertEquals(gv2, v) #print list(gv2) #print gv2.identifier graph.remove((gv1, RDF.value, gv2)) def testConnected(self): graph = self.graph self.addStuff() self.assertEquals(True, graph.connected()) jeroen = URIRef("jeroen") unconnected = URIRef("unconnected") graph.add((jeroen,self.likes,unconnected)) self.assertEquals(False, graph.connected())
class StoreTestCase(unittest.TestCase): """ Test case for testing store performance... probably should be something other than a unit test... but for now we'll add it as a unit test. """ store = 'default' def setUp(self): self.gcold = gc.isenabled() gc.collect() gc.disable() self.graph = Graph(store=self.store) if self.store == "MySQL": from test.mysql import configString from rdflib.store.MySQL import MySQL path = configString MySQL().destroy(path) else: path = a_tmp_dir = mkdtemp() self.graph.open(path, create=True) self.input = input = Graph() input.parse("http://eikeon.com") def tearDown(self): self.graph.close() if self.gcold: gc.enable() # TODO: delete a_tmp_dir del self.graph def testTime(self): number = 1 print self.store print "input:", for i in itertools.repeat(None, number): self._testInput() print "random:", for i in itertools.repeat(None, number): self._testRandom() print "." def _testRandom(self): number = len(self.input) store = self.graph def add_random(): s = random_uri() p = random_uri() o = random_uri() store.add((s, p, o)) it = itertools.repeat(None, number) t0 = time() for _i in it: add_random() t1 = time() print "%.3g" % (t1 - t0), def _testInput(self): number = 1 store = self.graph def add_from_input(): for t in self.input: store.add(t) it = itertools.repeat(None, number) t0 = time() for _i in it: add_from_input() t1 = time() print "%.3g" % (t1 - t0),
class GraphManager: """ GraphManager takes a list of RDF and OWL files, merges them, and provides convenience methods for extracting data from the (combined) graph. Note: It contains GOLD specific funtion calls. Also, when the documentation refers to 'object' this should be interpreted by the user as the object node within the graph (subject predicate object) entity relations and not 'object' in the sense of OOP. Also, the term 'subject' should be considered the 'subject' node. """ # todo: implement multiple constructors # def __init__(self, graphs): def __init__(self, graphs): """ Constructor takes a list of URLs that point to RDF/OWL files. :type graphs: list :param graphs: a list RDF/OWL files """ self.g = Graph() if type(graphs) == str: self.g.parse(graphs) else: for i in range(0, len(graphs)): self.g.parse(graphs[i]) # for testing - todo: load all namespaces from a merged graph in the code below self.owl = Namespace("http://www.w3.org/2002/07/owl#") self.rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#") self.biblio = Namespace("http://www.linguistics-ontology.org/bibliography/bibliography.owl#") self.goldbib = Namespace("http://www.linguistics-ontology.org/bibliography/gold-bibliography.rdf#") self.gold = Namespace("http://purl.org/linguistics/gold/") self.rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#") self.bibtex = Namespace("http://purl.oclc.org/NET/nknouf/ns/bibtex#") self.person = Namespace("http://www.linguistics-ontology.org/bibliography/person.rdf#") # get namespaces from graph self.namespaces = [] namespaces = self.g.namespaces() def __del__(self): """ Desconstructor for the Graph object """ self.g.close() def getPredicateObjects(self, subject): """ Return a list of predicate and object tuples for a given subject :type subject: str :param subject: a URI :rtype: list :return: a list of predicate and object tuples that match the """ return list(self.g.predicate_objects(subject)) def getClasses(self): """ Returns all subjects that match rdfs:type predicates and owl:Class objects. Returns a list of type rdflib.Literal.Literal. :rtype: list :return: a list of rdflib.URIRef.URIRef objects """ return list(self.g.subjects(self.rdf["type"], self.owl["Class"])) def getBibtexPublicationType(self): """ Returns a list of rdflib.Literal.Literal objects :rtype: list """ return list(self.g.subjects(self.rdf["about"], self.x["Barnes1984"])) def getDescription(self, subject): """ Gets each class' rdfs:comment. Takes a list of subjects. Returns a list of lists of rdflib.Literals, which are of type list, e.g. [rdflib.Literal('Verbalizer is the class of category changing units that change nouns into verbs.', language=None, datatype=None)]. Print on an rdflib.Literal prints the URI. :param subject: a URI :type subject: str :rtype: list """ return list(self.g.objects(subject, self.rdfs["comment"])) def getDescriptions(self, l): """ Gets each class' rdfs:comment. Takes a list of subjects. Returns a list of lists of rdflib.Literals, which are of type list, e.g. [rdflib.Literal('Verbalizer is the class of category changing units that change nouns into verbs.', language=None, datatype=None)] :param l: a list of subject nodes :type l: list :rtype: list """ results = [] for i in l: relations = list(self.g.objects(i, self.rdfs["comment"])) results.append(relations) return results def getCitations(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasCitation"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasCitation"])) def getCitationsPages(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasPageInformation"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasPageInformation"])) def getCitationsBibtexEntry(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasEntry"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasEntry"])) def getCitationsBibtexTitle(self, subject): """ Get object matches of a graph given a tuple (subject, biblio:hasBookTitle). The subject passed it needs to be a string and not a rdflib.URI object. :type subject: str :param subject: subject passed it needs to be a string and not a rdflib.URI object :rtype: list """ return list(self.g.objects(subject, self.biblio["hasBookTitle"])) def getCitationsBibtexAuthors(self, subject): """ Get object matches of a graph given a tuple (subject, biblio:hasAuthorList). The subject passed it needs to be a string and not a rdflib.URI object. :type subject: str :param subject: subject passed it needs to be a string and not a rdflib.URI object :rtype: list """ # return self.g.objects(subject, self.biblio["hasAuthorList"])) # double-check this return list(self.g.objects(subject, self.biblio["hasAuthorList"])) # bibtex:hasYear def getCitationsBibtexPublicationYear(self, subject): """ Get object matches of a graph given a tuple (subject, biblio:hasBookTitle). The subject passed it needs to be a string and not a rdflib.URI object. :type subject: str :param subject: subject passed it needs to be a string and not a rdflib.URI object :rtype: list """ print list(self.g.objects(self.bibtex["Book"], self.biblio["hasAuthorList"])) # bibtex:hasPublisher def getCitationsBibtexPublisher(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasPublisher"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasPublisher"])) # bibtex:hasAddress def getCitationsBibtexAddress(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasAddress"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasAddress"])) # rdfs:comment def getCitationsBibtexComment(self, subject): """ Return a list of objects where the subject parameter matches the rdfs["comment"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.rdfs["comment"])) # biblio:hasEditorList def getCitationsBibtexEditors(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasEditorList"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasEditorList"])) # bibtex:hasSeries def getCitationsBibtexSeries(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasSeries"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasSeries"])) # bibtex:hasVolume def getCitationsBibtexVolume(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasVolume"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasVolume"])) # bibtex:hasJournal def getCitationsBibtexJournal(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasJournal"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasJournal"])) # <bibtex:Article rdf:about="Barnes1984"> # <bibtex:InCollection rdf:about="Noonan1994"> # <bibtex:Book rdf:about="Miller1965"> def getCitationsBibtexPublicationType(self, object): uri = object[0] # print "OBJECT:", uri ref_split = uri.partition("#") ref = ref_split[2] print "REF:", ref # return list of URIs instead of rdflib.URIRef objects (for now) subjects = list(self.g.subjects(self.rdf["about"], ref)) print "SUBJECTS:", subjects return subjects # test this - doesn't seem to work in the interpreter def getLabel(self, subject): """ Return a list of objects where the subject parameter matches the rdfs["label"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.rdfs["label"])) def getLabels(self, l): """ Return a list of objects where the subject parameter matches the rdfs["label"] predicate :type l: list :param l: a list of subject (strs) as URIs :rtype: list """ results = [] for i in l: relations = list(self.g.objects(i, self.rdfs["label"])) results.append(relations) return results def getSubClasses(self, subject): """ Returns a list of rdflib.URIRef objects that contains the subClassOf relations for the passed in subject. :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.rdfs["subClassOf"])) def getCitaton(self, c): objects = list(self.g.objects()) pass # doesn't work -- fix this def isDefinedBy(self): """ Returns a list of of (rdflib.URIRef, rdflib.URIRef) tuples for (subject, object) defined by the predicate rdfs:isDefinedBy. :rtype: list """ return list(self.g.subject_objects(self.rdfs["isDefinedBy"])) # BEGIN BIBMANAGER METHODS def printGraph(self): """ Print the graph """ print self.g.serialize() def getAuthorList(self, subject): """ Return a list of subjects that match rdf:type biblio:hasAuthorList :param subject: a URI :type subject: str :rtype: list """ return list(self.g.objects(subject, self.biblio["hasAuthorList"])) def getEditorList(self, subject): """ Return a list of subjects that match rdf:type biblio:hasEditorList :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasEditorList"])) def getSeq(self, subject): """ Returns a sequence of objects that match the subject parameter and rdf:_1 :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.rdf["_1"])) def getSeqs(self, bnode): """ Return a list of predicates_object tuples that have a bnode :type bnode: str :param bnode: a URI :rtype: list """ return list(self.g.predicate_objects(bnode)) def getYear(self, subject): """ Return a list of subjects that match rdf:type bibtex:hasYear :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasYear"])) def getTitle(self, subject): """ Return a list of subjects that match rdf:type bibtex:Title :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasTitle"])) def getJournal(self, subject): """ Return a list of subjects that match rdf:type bibtex:hasJournal :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasJournal"])) def getVolume(self, subject): """ Return a list of subjects that match rdf:type bibtex:hasVolume :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasVolume"])) def getPublisher(self, subject): """ Return a list of subjects that match rdf:type bibtex:hasPublisher :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasPublisher"])) def getAddress(self, subject): """ Return a list of subjects that match rdf:type bibtex:hasAddress :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasAddress"])) def getChapter(self, subject): """ Return a list of subjects that match rdf:type bibtex:hasChapter :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasChapter"])) def getPages(self, subject): """ Return a list of subjects that match rdf:type bibtex:hasPages :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasPages"])) def getBookTitle(self, subject): """ Return a list of subjects that match rdf:type bibtex:hasBookTitle :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasBookTitle"])) def getArticles(self): """ Return a list of subjects that match rdf:type bibtex:Article :rtype: list """ return list(self.g.subjects(self.rdf["type"], self.bibtex["Article"])) def getInCollections(self): """ Return a list of subjects that match rdf:type bibtex:InCollection :rtype: list """ return list(self.g.subjects(self.rdf["type"], self.bibtex["InCollection"])) def getInProceedings(self): """ Return a list of subjects that match rdf:type bibtex:InProceedings :rtype: list """ return list(self.g.subjects(self.rdf["type"], self.bibtex["InProceedings"])) def getBooks(self): """ Return a list of subjects that match rdf:type bibtex:Book :rtype: list """ return list(self.g.subjects(self.rdf["type"], self.bibtex["Book"])) def getMiscs(self): """ Return a list of subjects that match rdf:type bibtex:Misc :rtype: list """ return list(self.g.subjects(self.rdf["type"], self.bibtex["Misc"])) def getTechReports(self): """ Return a list of subjects that match rdf:type bibtex:TechReport :rtype: list """ return list(self.g.subjects(self.rdf["type"], self.bibtex["TechReport"])) def getPhdTheses(self): """ Return a list of subjects that match rdf:type bibtex:PhDThesis :rtype: list """ return list(self.g.subjects(self.rdf["type"], self.bibtex["PhdThesis"])) def __len__(self): """ Return length of (merged) graph(s). :rtype: int :return: length of the graph """ return len(self.g) def getLength(self): """ Return length of (merged) graph(s). :rtype: int :return: length of the graph """ return len(self.g) def getAuthors(self, subject): """ Return a list of authors that match the subject and biblio:hasAuthorList, e.g.: - s: rdf:about="http://www.linguistics-ontology.org/bibliography/gold-bibliography.rdf#Leman1980" - p: biblio:hasAuthorList :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasAuthorList"])) # not yet tested def getSubjectObjects(self, predicate): """ Returns a list of (subject, object) tuples for the given predicate. The predicate may be passed as either a uri or as a tuple of (uri_prefix, concept). :type predicate: str :param predicate: a URI :rtype: list """ if type(predicate) == []: return list(subject_objects(predicate[0], predicate[1])) else: return list(subject_objects(predicate))
class GraphManager: """ GraphManager takes a list of RDF and OWL files, merges them, and provides convenience methods for extracting data from the (combined) graph. Note: It contains GOLD specific funtion calls. Also, when the documentation refers to 'object' this should be interpreted by the user as the object node within the graph (subject predicate object) entity relations and not 'object' in the sense of OOP. Also, the term 'subject' should be considered the 'subject' node. """ # todo: implement multiple constructors # def __init__(self, graphs): def __init__(self, graphs): """ Constructor takes a list of URLs that point to RDF/OWL files. :type graphs: list :param graphs: a list RDF/OWL files """ self.g = Graph() if type(graphs) == str: self.g.parse(graphs) else: for i in range(0, len(graphs)): self.g.parse(graphs[i]) # for testing - todo: load all namespaces from a merged graph in the code below self.owl = Namespace("http://www.w3.org/2002/07/owl#") self.rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#") self.biblio = Namespace( "http://www.linguistics-ontology.org/bibliography/bibliography.owl#" ) self.goldbib = Namespace( "http://www.linguistics-ontology.org/bibliography/gold-bibliography.rdf#" ) self.gold = Namespace("http://purl.org/linguistics/gold/") self.rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#") self.bibtex = Namespace("http://purl.oclc.org/NET/nknouf/ns/bibtex#") self.person = Namespace( "http://www.linguistics-ontology.org/bibliography/person.rdf#") # get namespaces from graph self.namespaces = [] namespaces = self.g.namespaces() def __del__(self): """ Desconstructor for the Graph object """ self.g.close() def getPredicateObjects(self, subject): """ Return a list of predicate and object tuples for a given subject :type subject: str :param subject: a URI :rtype: list :return: a list of predicate and object tuples that match the """ return list(self.g.predicate_objects(subject)) def getClasses(self): """ Returns all subjects that match rdfs:type predicates and owl:Class objects. Returns a list of type rdflib.Literal.Literal. :rtype: list :return: a list of rdflib.URIRef.URIRef objects """ return list(self.g.subjects(self.rdf["type"], self.owl["Class"])) def getBibtexPublicationType(self): """ Returns a list of rdflib.Literal.Literal objects :rtype: list """ return list(self.g.subjects(self.rdf["about"], self.x["Barnes1984"])) def getDescription(self, subject): """ Gets each class' rdfs:comment. Takes a list of subjects. Returns a list of lists of rdflib.Literals, which are of type list, e.g. [rdflib.Literal('Verbalizer is the class of category changing units that change nouns into verbs.', language=None, datatype=None)]. Print on an rdflib.Literal prints the URI. :param subject: a URI :type subject: str :rtype: list """ return list(self.g.objects(subject, self.rdfs["comment"])) def getDescriptions(self, l): """ Gets each class' rdfs:comment. Takes a list of subjects. Returns a list of lists of rdflib.Literals, which are of type list, e.g. [rdflib.Literal('Verbalizer is the class of category changing units that change nouns into verbs.', language=None, datatype=None)] :param l: a list of subject nodes :type l: list :rtype: list """ results = [] for i in l: relations = list(self.g.objects(i, self.rdfs["comment"])) results.append(relations) return results def getCitations(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasCitation"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasCitation"])) def getCitationsPages(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasPageInformation"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasPageInformation"])) def getCitationsBibtexEntry(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasEntry"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasEntry"])) def getCitationsBibtexTitle(self, subject): """ Get object matches of a graph given a tuple (subject, biblio:hasBookTitle). The subject passed it needs to be a string and not a rdflib.URI object. :type subject: str :param subject: subject passed it needs to be a string and not a rdflib.URI object :rtype: list """ return list(self.g.objects(subject, self.biblio["hasBookTitle"])) def getCitationsBibtexAuthors(self, subject): """ Get object matches of a graph given a tuple (subject, biblio:hasAuthorList). The subject passed it needs to be a string and not a rdflib.URI object. :type subject: str :param subject: subject passed it needs to be a string and not a rdflib.URI object :rtype: list """ # return self.g.objects(subject, self.biblio["hasAuthorList"])) # double-check this return list(self.g.objects(subject, self.biblio["hasAuthorList"])) # bibtex:hasYear def getCitationsBibtexPublicationYear(self, subject): """ Get object matches of a graph given a tuple (subject, biblio:hasBookTitle). The subject passed it needs to be a string and not a rdflib.URI object. :type subject: str :param subject: subject passed it needs to be a string and not a rdflib.URI object :rtype: list """ print list( self.g.objects(self.bibtex["Book"], self.biblio["hasAuthorList"])) # bibtex:hasPublisher def getCitationsBibtexPublisher(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasPublisher"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasPublisher"])) # bibtex:hasAddress def getCitationsBibtexAddress(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasAddress"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasAddress"])) # rdfs:comment def getCitationsBibtexComment(self, subject): """ Return a list of objects where the subject parameter matches the rdfs["comment"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.rdfs["comment"])) # biblio:hasEditorList def getCitationsBibtexEditors(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasEditorList"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasEditorList"])) # bibtex:hasSeries def getCitationsBibtexSeries(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasSeries"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasSeries"])) # bibtex:hasVolume def getCitationsBibtexVolume(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasVolume"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasVolume"])) # bibtex:hasJournal def getCitationsBibtexJournal(self, subject): """ Return a list of objects where the subject parameter matches the biblio["hasJournal"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasJournal"])) # <bibtex:Article rdf:about="Barnes1984"> # <bibtex:InCollection rdf:about="Noonan1994"> # <bibtex:Book rdf:about="Miller1965"> def getCitationsBibtexPublicationType(self, object): uri = object[0] # print "OBJECT:", uri ref_split = uri.partition("#") ref = ref_split[2] print "REF:", ref # return list of URIs instead of rdflib.URIRef objects (for now) subjects = list(self.g.subjects(self.rdf["about"], ref)) print "SUBJECTS:", subjects return subjects # test this - doesn't seem to work in the interpreter def getLabel(self, subject): """ Return a list of objects where the subject parameter matches the rdfs["label"] predicate :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.rdfs["label"])) def getLabels(self, l): """ Return a list of objects where the subject parameter matches the rdfs["label"] predicate :type l: list :param l: a list of subject (strs) as URIs :rtype: list """ results = [] for i in l: relations = list(self.g.objects(i, self.rdfs["label"])) results.append(relations) return results def getSubClasses(self, subject): """ Returns a list of rdflib.URIRef objects that contains the subClassOf relations for the passed in subject. :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.rdfs["subClassOf"])) def getCitaton(self, c): objects = list(self.g.objects()) pass # doesn't work -- fix this def isDefinedBy(self): """ Returns a list of of (rdflib.URIRef, rdflib.URIRef) tuples for (subject, object) defined by the predicate rdfs:isDefinedBy. :rtype: list """ return list(self.g.subject_objects(self.rdfs["isDefinedBy"])) # BEGIN BIBMANAGER METHODS def printGraph(self): """ Print the graph """ print self.g.serialize() def getAuthorList(self, subject): """ Return a list of subjects that match rdf:type biblio:hasAuthorList :param subject: a URI :type subject: str :rtype: list """ return list(self.g.objects(subject, self.biblio["hasAuthorList"])) def getEditorList(self, subject): """ Return a list of subjects that match rdf:type biblio:hasEditorList :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasEditorList"])) def getSeq(self, subject): """ Returns a sequence of objects that match the subject parameter and rdf:_1 :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.rdf["_1"])) def getSeqs(self, bnode): """ Return a list of predicates_object tuples that have a bnode :type bnode: str :param bnode: a URI :rtype: list """ return list(self.g.predicate_objects(bnode)) def getYear(self, subject): """ Return a list of subjects that match rdf:type bibtex:hasYear :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasYear"])) def getTitle(self, subject): """ Return a list of subjects that match rdf:type bibtex:Title :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasTitle"])) def getJournal(self, subject): """ Return a list of subjects that match rdf:type bibtex:hasJournal :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasJournal"])) def getVolume(self, subject): """ Return a list of subjects that match rdf:type bibtex:hasVolume :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasVolume"])) def getPublisher(self, subject): """ Return a list of subjects that match rdf:type bibtex:hasPublisher :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasPublisher"])) def getAddress(self, subject): """ Return a list of subjects that match rdf:type bibtex:hasAddress :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasAddress"])) def getChapter(self, subject): """ Return a list of subjects that match rdf:type bibtex:hasChapter :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasChapter"])) def getPages(self, subject): """ Return a list of subjects that match rdf:type bibtex:hasPages :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasPages"])) def getBookTitle(self, subject): """ Return a list of subjects that match rdf:type bibtex:hasBookTitle :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.bibtex["hasBookTitle"])) def getArticles(self): """ Return a list of subjects that match rdf:type bibtex:Article :rtype: list """ return list(self.g.subjects(self.rdf["type"], self.bibtex["Article"])) def getInCollections(self): """ Return a list of subjects that match rdf:type bibtex:InCollection :rtype: list """ return list( self.g.subjects(self.rdf["type"], self.bibtex["InCollection"])) def getInProceedings(self): """ Return a list of subjects that match rdf:type bibtex:InProceedings :rtype: list """ return list( self.g.subjects(self.rdf["type"], self.bibtex["InProceedings"])) def getBooks(self): """ Return a list of subjects that match rdf:type bibtex:Book :rtype: list """ return list(self.g.subjects(self.rdf["type"], self.bibtex["Book"])) def getMiscs(self): """ Return a list of subjects that match rdf:type bibtex:Misc :rtype: list """ return list(self.g.subjects(self.rdf["type"], self.bibtex["Misc"])) def getTechReports(self): """ Return a list of subjects that match rdf:type bibtex:TechReport :rtype: list """ return list( self.g.subjects(self.rdf["type"], self.bibtex["TechReport"])) def getPhdTheses(self): """ Return a list of subjects that match rdf:type bibtex:PhDThesis :rtype: list """ return list(self.g.subjects(self.rdf["type"], self.bibtex["PhdThesis"])) def __len__(self): """ Return length of (merged) graph(s). :rtype: int :return: length of the graph """ return len(self.g) def getLength(self): """ Return length of (merged) graph(s). :rtype: int :return: length of the graph """ return len(self.g) def getAuthors(self, subject): """ Return a list of authors that match the subject and biblio:hasAuthorList, e.g.: - s: rdf:about="http://www.linguistics-ontology.org/bibliography/gold-bibliography.rdf#Leman1980" - p: biblio:hasAuthorList :type subject: str :param subject: a URI :rtype: list """ return list(self.g.objects(subject, self.biblio["hasAuthorList"])) # not yet tested def getSubjectObjects(self, predicate): """ Returns a list of (subject, object) tuples for the given predicate. The predicate may be passed as either a uri or as a tuple of (uri_prefix, concept). :type predicate: str :param predicate: a URI :rtype: list """ if type(predicate) == []: return list(subject_objects(predicate[0], predicate[1])) else: return list(subject_objects(predicate))
class GraphTestCase(unittest.TestCase): store_name = 'default' path = None slowtest = True def setUp(self): self.graph = Graph(store=self.store_name) a_tmp_dir = mkdtemp() self.path = self.path or a_tmp_dir self.graph.open(self.path) self.michel = URIRef(u'michel') self.tarek = URIRef(u'tarek') self.bob = URIRef(u'bob') self.likes = URIRef(u'likes') self.hates = URIRef(u'hates') self.pizza = URIRef(u'pizza') self.cheese = URIRef(u'cheese') def tearDown(self): self.graph.close() def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese self.graph.add((tarek, likes, pizza)) self.graph.add((tarek, likes, cheese)) self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.add((bob, likes, cheese)) self.graph.add((bob, hates, pizza)) self.graph.add((bob, hates, michel)) # gasp! def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese self.graph.remove((tarek, likes, pizza)) self.graph.remove((tarek, likes, cheese)) self.graph.remove((michel, likes, pizza)) self.graph.remove((michel, likes, cheese)) self.graph.remove((bob, likes, cheese)) self.graph.remove((bob, hates, pizza)) self.graph.remove((bob, hates, michel)) # gasp! def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese asserte = self.assertEquals triples = self.graph.triples Any = None self.addStuff() # unbound subjects asserte(len(list(triples((Any, likes, pizza)))), 2) asserte(len(list(triples((Any, hates, pizza)))), 1) asserte(len(list(triples((Any, likes, cheese)))), 3) asserte(len(list(triples((Any, hates, cheese)))), 0) # unbound objects asserte(len(list(triples((michel, likes, Any)))), 2) asserte(len(list(triples((tarek, likes, Any)))), 2) asserte(len(list(triples((bob, hates, Any)))), 2) asserte(len(list(triples((bob, likes, Any)))), 1) # unbound predicates asserte(len(list(triples((michel, Any, cheese)))), 1) asserte(len(list(triples((tarek, Any, cheese)))), 1) asserte(len(list(triples((bob, Any, pizza)))), 1) asserte(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects asserte(len(list(triples((Any, hates, Any)))), 2) asserte(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects asserte(len(list(triples((michel, Any, Any)))), 2) asserte(len(list(triples((bob, Any, Any)))), 3) asserte(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates asserte(len(list(triples((Any, Any, pizza)))), 3) asserte(len(list(triples((Any, Any, cheese)))), 3) asserte(len(list(triples((Any, Any, michel)))), 1) # all unbound asserte(len(list(triples((Any, Any, Any)))), 7) self.removeStuff() asserte(len(list(triples((Any, Any, Any)))), 0) def testStatementNode(self): graph = self.graph from rdflib.Statement import Statement c = URIRef("http://example.org/foo#c") r = URIRef("http://example.org/foo#r") s = Statement((self.michel, self.likes, self.pizza), c) graph.add((s, RDF.value, r)) self.assertEquals(r, graph.value(s, RDF.value)) self.assertEquals(s, graph.value(predicate=RDF.value, object=r)) def testGraphValue(self): from rdflib.Graph import GraphValue graph = self.graph alice = URIRef("alice") bob = URIRef("bob") pizza = URIRef("pizza") cheese = URIRef("cheese") g1 = Graph() g1.add((alice, RDF.value, pizza)) g1.add((bob, RDF.value, cheese)) g1.add((bob, RDF.value, pizza)) g2 = Graph() g2.add((bob, RDF.value, pizza)) g2.add((bob, RDF.value, cheese)) g2.add((alice, RDF.value, pizza)) gv1 = GraphValue(store=graph.store, graph=g1) gv2 = GraphValue(store=graph.store, graph=g2) graph.add((gv1, RDF.value, gv2)) v = graph.value(gv1) #print type(v) self.assertEquals(gv2, v) #print list(gv2) #print gv2.identifier graph.remove((gv1, RDF.value, gv2)) def testConnected(self): graph = self.graph self.addStuff() self.assertEquals(True, graph.connected()) jeroen = URIRef("jeroen") unconnected = URIRef("unconnected") graph.add((jeroen, self.likes, unconnected)) self.assertEquals(False, graph.connected())
class StoreTestCase(unittest.TestCase): """ Test case for testing store performance... probably should be something other than a unit test... but for now we'll add it as a unit test. """ store = 'default' def setUp(self): self.gcold = gc.isenabled() gc.collect() gc.disable() self.graph = Graph(store=self.store) if self.store == "MySQL": from test.mysql import configString from rdflib.store.MySQL import MySQL path=configString MySQL().destroy(path) else: path = a_tmp_dir = mkdtemp() self.graph.open(path, create=True) self.input = input = Graph() input.parse("http://eikeon.com") def tearDown(self): self.graph.close() if self.gcold: gc.enable() # TODO: delete a_tmp_dir del self.graph def testTime(self): number = 1 print self.store print "input:", for i in itertools.repeat(None, number): self._testInput() print "random:", for i in itertools.repeat(None, number): self._testRandom() print "." def _testRandom(self): number = len(self.input) store = self.graph def add_random(): s = random_uri() p = random_uri() o = random_uri() store.add((s, p, o)) it = itertools.repeat(None, number) t0 = time() for _i in it: add_random() t1 = time() print "%.3g" % (t1 - t0), def _testInput(self): number = 1 store = self.graph def add_from_input(): for t in self.input: store.add(t) it = itertools.repeat(None, number) t0 = time() for _i in it: add_from_input() t1 = time() print "%.3g" % (t1 - t0),