def getGraph(): """ get the main graph, including data from trains.n3 on disk """ graph = ConjunctiveGraph() graph.parse("trains.n3", format="n3", publicID=TT['disk#context']) return graph
def readGraphs(): g = ConjunctiveGraph() # this file should only be reread when it changes g.parse("../config.n3", format="n3") dl = [] startTime = time.time() for uri in [ "http://bang:9055/graph", "http://bang:9069/graph", "http://bang:9070/graph", "http://bang:9072/bang-9002/processStatus", "http://bang:9072/bang/processStatus", "http://bang:9072/dash/processStatus", "http://bang:9072/slash-11000/processStatus", "http://bang:9072/slash/processStatus", "http://bang:9072/star/processStatus", "http://bang:9075/graph", ]: # this ought to not reparse the ones that are 304 not modified d = getPage(uri) def done(trig, uri): g.addN(parseTrig(trig)) print "%s done in %.02fms" % (uri, 1000 * (time.time() - startTime)) d.addCallback(done, uri) dl.append(d) return defer.DeferredList(dl).addCallback(lambda result: g)
def check_propeties(vocabfile): graph = Graph() try: graph.parse(vocabfile) except: return {} subject = none for s in graph.subjects(namespaces['dc']['title'], None): subject = s if not subject: for s in graph.subjects(namespaces['dcterms']['title'], None): subject = s if not subject: for s in graph.subjects(namespaces['dc']['creator'], None): subject = s if not subject: for s in graph.subjects(namespaces['dcterms']['creator'], None): subject = s properties = {} #Identifier identifier = [] ids = graph.objects(subject, namespaces['dc']['identifier']) for id in ids: identifier = id if not identifier: ids = graph.objects(subject, namespaces['dcterms']['identifier']) for id in ids: identifier.append(id) properties['identifier'] = [id] #hasFormat format = False prop1 = True prop2 = False prop3 = False properties['format'] = [] for fm in graph.objects(subject, namespaces['dcterms']['hasFormat']): properties['format'].append(fm) bnodes = graph.objects(fm, namespaces['dc']['format']) for b in bnodes: for value in graph.objects(b, namespaces['rdf']['value']): prop1 = True for value in graph.objects(b, namespaces['rdfs']['label']): prop2 = True for value in graph.objects(b, namespaces['rdfs']['type']): prop3 = True if not 'all' in properties: properties['all'] = prop1 and prop2 and prop3 else: properties['all'] = properties['all'] and prop1 and prop2 and prop3 conforms = False if identifier and properties['format'] and properties['all']: conforms = True return (conforms, properties)
def main(specloc="file:index.rdf"): """The meat and potatoes: Everything starts here.""" m = Graph() m.parse(specloc) classlist, proplist = specInformation(m) # Build HTML list of terms. azlist = buildazlist(classlist, proplist) # Generate Term HTML termlist = "<h3>Classes and Properties (full detail)</h3>" termlist += "<div class='termdetails'>" termlist += docTerms('Class', classlist, m) termlist += docTerms('Property', proplist, m) termlist += "</div>" # Generate RDF from original namespace. u = urllib.urlopen(specloc) rdfdata = u.read() rdfdata.replace("""<?xml version="1.0"?>""", "") # wip.template is a template file for the spec, python-style % escapes # for replaced sections. f = open("../0.1/template.html", "r") template = f.read() print template % (azlist.encode("utf-8"), termlist.encode("utf-8"), rdfdata)
def check_type_definitions(vocabfile): graph = Graph() try: graph.parse(vocabfile) except: return False all_definitions = True testo = vocab_type_definitions_test['rdfs'] subjects = [] subs = graph.subjects(namespaces['rdf']['type'], URIRef(testo)) for s in subs: subjects.append(s) if subjects: objects = vocab_type_definitions_rdfs else: objects = vocab_type_definitions_owl for o in objects: subs = graph.subjects(namespaces['rdf']['type'], o) done = [] for s in subs: if s in done: continue done.append(s) definition = False vals = graph.objects(s, namespaces['rdfs']['isDefinedBy']) for val in vals: definition = True all_definitions = all_definitions and definition return all_definitions
def main(): """Main Function Simple command-line procedure for web2rdf.""" if len(sys.argv) != 2: print "Must call with a URI parameter." print "Usage: %s uriSrc" % sys.argv[0] return uri = sys.argv[1] # Get the RDF wrdf = Web2Rdf(uri) rdf = wrdf.getRdf() if not rdf: print "No RDF returned!" return False print "Got RDF..." rdf = rdfString(rdf) # Open Storage print "Opening store..." db = "./testdb.sqlite" rstore = RdfStore('sqlite', db) rstore.open() print "Storing..." graph = Graph(rstore.get(), identifier = URIRef("http://slashdot/Test2")) #graph.parse("example.rdf") graph.parse(rdf, publicID=uri) graph.commit()
def check_type_definitions(vocabfile): graph = Graph() try: graph.parse(vocabfile) except: return False all_definitions = True testo = vocab_type_definitions_test['rdfs'] subjects = [] subs = graph.subjects(namespaces['rdf']['type'], URIRef(testo)) for s in subs: subjects.append(s) if subjects: objects = vocab_type_definitions_rdfs else: objects = vocab_type_definitions_owl for o in objects: subs = graph.subjects(namespaces['rdf']['type'], o) done = [] for s in subs: if s in done: continue done.append(s) definition = False vals = graph.objects(s, namespaces['rdfs']['isDefinedBy']) for val in vals: definition = True all_definitions = all_definitions and definition return all_definitions
def check_propeties(vocabfile): graph = Graph() try: graph.parse(vocabfile) except: return {} subject = none for s in graph.subjects(namespaces['dc']['title'], None): subject = s if not subject: for s in graph.subjects(namespaces['dcterms']['title'], None): subject = s if not subject: for s in graph.subjects(namespaces['dc']['creator'], None): subject = s if not subject: for s in graph.subjects(namespaces['dcterms']['creator'], None): subject = s properties = {} #Identifier identifier = [] ids = graph.objects(subject, namespaces['dc']['identifier']) for id in ids: identifier = id if not identifier: ids = graph.objects(subject, namespaces['dcterms']['identifier']) for id in ids: identifier.append(id) properties['identifier'] = [id] #hasFormat format = False prop1 = True prop2 = False prop3 = False properties['format'] = [] for fm in graph.objects(subject, namespaces['dcterms']['hasFormat']): properties['format'].append(fm) bnodes = graph.objects(fm, namespaces['dc']['format']) for b in bnodes: for value in graph.objects(b, namespaces['rdf']['value']): prop1 = True for value in graph.objects(b, namespaces['rdfs']['label']): prop2 = True for value in graph.objects(b, namespaces['rdfs']['type']): prop3 = True if not 'all' in properties: properties['all'] = prop1 and prop2 and prop3 else: properties['all'] = properties['all'] and prop1 and prop2 and prop3 conforms = False if identifier and properties['format'] and properties['all']: conforms = True return (conforms, properties)
def parse(self, result): """ Parse query result @param result: text result @return: rdf graph """ graph = ConjunctiveGraph() graph.parse(StringInputSource(result)) return graph
def parse(self, result): """ Parse query result @param result: text result @return: rdf graph """ graph = ConjunctiveGraph() graph.parse(StringInputSource(result)) return graph
def testModel(self): g = ConjunctiveGraph() g.parse(StringInputSource(input), format="n3") i = 0 for s, p, o in g: if isinstance(s, Graph): i += 1 self.assertEquals(i, 3) self.assertEquals(len(list(g.contexts())), 13) g.close()
def testModel(self): g = ConjunctiveGraph() g.parse(StringInputSource(input), format="n3") i = 0 for s, p, o in g: if isinstance(s, Graph): i += 1 self.assertEquals(i, 3) self.assertEquals(len(list(g.contexts())), 13) g.close()
def testModel(self): print 'Probando la función testModel\n_____________________________' g = ConjunctiveGraph() g.parse(StringInputSource(input), format="n3") i = 0 for s, p, o in g: if isinstance(s, Graph): i += 1 print i #self.assertEquals(i, 3) #self.assertEquals(len(list(g.contexts())), 13) #print g.serialize() g.close()
def del_vocab_from_creator(userid, vocab): if not os.path.isfile(os.path.join(ag.creatorsdir, '%s.rdf'%userid)): return False graph = Graph() graph.parse(os.path.join(ag.creatorsdir, '%s.rdf'%userid)) vocab_uri = URIRef("http://vocab.ox.ac.uk/%s"%vocabprefix) for s, p, o in graph.triples((URIRef(vocab_uri), namespaces['dcterms']['mediator'], None)): graph.remove((s, p, o)) rdf_str = None rdf_str = graph.serialize() f = codecs.open(creatorfile, 'w', 'utf-8') f.write(rdf_str) f.close() return True
def del_vocab_from_creator(userid, vocab): if not os.path.isfile(os.path.join(ag.creatorsdir, '%s.rdf' % userid)): return False graph = Graph() graph.parse(os.path.join(ag.creatorsdir, '%s.rdf' % userid)) vocab_uri = URIRef("http://vocab.ox.ac.uk/%s" % vocabprefix) for s, p, o in graph.triples( (URIRef(vocab_uri), namespaces['dcterms']['mediator'], None)): graph.remove((s, p, o)) rdf_str = None rdf_str = graph.serialize() f = codecs.open(creatorfile, 'w', 'utf-8') f.write(rdf_str) f.close() return True
def _discover_meta(self, homepage, candidate): self.triples.push_meta(homepage, candidate) self.stats.count_rdf() logging.debug("Analyzing '%s'" % candidate) # FIXME: Not a good idea, think about it. if re.match(r".*\.rdf$", candidate) is not None: graph = ConjunctiveGraph() try: graph.parse(candidate) except (SAXParseException, RdflibParserError), e: self.stats.count_invalidrdf() raise RDFDiscoveringMalformedError(str(e), candidate) except urllib2.URLError: self.stats.count_invalidrdf() raise RDFDiscoveringBrokenLinkError(candidate)
def testQueryingMore(self): for result in self.results: uri = result[0] g = ConjunctiveGraph() g.parse(uri) query = Parse(""" SELECT ?person WHERE { <%s> foaf:primaryTopic ?person . ?person rdf:type foaf:Person . } """ % uri ) queryResults = g.query(query, initNs=NSbindings).serialize('python') if (len(queryResults)>0): self.assertEquals(str(queryResults[0]), "http://www.wikier.org/foaf#wikier")
class TestSPARQLToldBNodes(unittest.TestCase): def setUp(self): NS = u"http://example.org/" self.graph = ConjunctiveGraph() self.graph.parse(StringInputSource(""" @prefix : <http://example.org/> . @prefix rdf: <%s> . @prefix rdfs: <%s> . [ :prop :val ]. [ a rdfs:Class ]."""%(RDF.RDFNS,RDFS.RDFSNS)), format="n3") def testToldBNode(self): for s,p,o in self.graph.triples((None,RDF.type,None)): pass query = """SELECT ?obj WHERE { %s ?prop ?obj }"""%s.n3() print query rt = self.graph.query(query) self.failUnless(len(rt) == 1,"BGP should only match the 'told' BNode by name (result set size: %s)"%len(rt))
def testQueryingMore(self): for result in self.results: uri = result[0] g = ConjunctiveGraph() g.parse(uri) query = Parse(""" SELECT ?person WHERE { <%s> foaf:primaryTopic ?person . ?person rdf:type foaf:Person . } """ % uri) queryResults = g.query(query, initNs=NSbindings).serialize('python') if (len(queryResults) > 0): self.assertEquals(str(queryResults[0]), "http://www.wikier.org/foaf#wikier")
def main(specloc, template, mode="spec"): """The meat and potatoes: Everything starts here.""" m = Graph() m.parse(specloc) # m = RDF.Model() # p = RDF.Parser() # p.parse_into_model(m, specloc) classlist, proplist = specInformation(m) if mode == "spec": # Build HTML list of terms. azlist = buildazlist(classlist, proplist) elif mode == "list": # Build simple <ul> list of terms. azlist = build_simple_list(classlist, proplist) # Generate Term HTML # termlist = "<h3>Classes and Properties (full detail)</h3>" termlist = docTerms('Class',classlist,m) termlist += docTerms('Property',proplist,m) # Generate RDF from original namespace. u = urllib.urlopen(specloc) rdfdata = u.read() rdfdata = re.sub(r"(<\?xml version.*\?>)", "", rdfdata) rdfdata = re.sub(r"(<!DOCTYPE[^]]*]>)", "", rdfdata) rdfdata.replace("""<?xml version="1.0"?>""", "") # print template % (azlist.encode("utf-8"), termlist.encode("utf-8"), rdfdata.encode("ISO-8859-1")) #template = re.sub(r"^#format \w*\n", "", template) #template = re.sub(r"\$VersionInfo\$", owlVersionInfo(m).encode("utf-8"), template) # NOTE: This works with the assumtpion that all "%" in the template are escaped to "%%" and it # contains the same number of "%s" as the number of parameters in % ( ...parameters here... ) print "AZlist",azlist print "Termlist",termlist #xxx template = template % (azlist.encode("utf-8"), termlist.encode("utf-8")); # template += "<!-- specification regenerated at " + time.strftime('%X %x %Z') + " -->" return template
def load_store(files): """ Takes a directory of RDf files and loads them into the store. """ try: store = plugin.get("MySQL", Store)("rdflib_db") store.open(config["rdflib.config"]) graph = ConjunctiveGraph(store) # iterate through files and load them into the graph for fpath in fl: graph.parse(fpath, format=get_format(fpath), publicID=context_uri(fpath)) print fpath + " loaded." # save triples to store graph.commit() graph.close() except: print "=== error opening RDF store ===" exit
def testSPARQLNotEquals(): NS = u"http://example.org/" graph = ConjunctiveGraph() graph.parse(StringInputSource(""" @prefix : <http://example.org/> . @prefix rdf: <%s> . :foo rdf:value 1. :bar rdf:value 2."""%RDF.RDFNS), format="n3") rt = graph.query("""SELECT ?node WHERE { ?node rdf:value ?val. FILTER (?val != 1) }""", initNs={'rdf':RDF.RDFNS}, DEBUG=False) for row in rt: item = row[0] assert item == URIRef("http://example.org/bar")
def testSPARQLNotEquals(): NS = u"http://example.org/" graph = ConjunctiveGraph() graph.parse(StringInputSource(""" @prefix : <http://example.org/> . @prefix rdf: <%s> . :foo rdf:value 1. :bar rdf:value 2.""" % RDF.RDFNS), format="n3") rt = graph.query("""SELECT ?node WHERE { ?node rdf:value ?val. FILTER (?val != 1) }""", initNs={'rdf': RDF.RDFNS}, DEBUG=False) for row in rt: item = row[0] assert item == URIRef("http://example.org/bar")
def getRdfXml(rdf): n3 = "" # Append the RDF namespace and print the prefix namespace mappings rdf['namespaces']['xh1'] = "http://www.w3.org/1999/xhtml/vocab#" rdf['namespaces']['rdf'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" for prefix, uri in rdf['namespaces'].items(): n3 += "@prefix %s: <%s> .\n" % (prefix, uri) # Print each subject-based triple to the screen triples = rdf['triples'] processed = [] # Get all of the non-bnode subjects nonBnodeSubjects = getNonBnodeSubjects(triples) # Get all of the bnode subjects bnodeSubjects = getBnodeSubjects(triples) for subject in nonBnodeSubjects: subjectTriples = getTriplesBySubject(subject, triples) #print "PROCESSING NB SUBJECT:", subjectTriples if(subject not in processed): n3 += tripleToN3(subjectTriples, processed, triples) processed.append(subject) for subject in bnodeSubjects: subjectTriples = getTriplesBySubject(subject, triples) #print "PROCESSING BN SUBJECT:", subject if(subject not in processed): n3 += bnodeToN3(subjectTriples, processed, triples) n3 += " .\n" #print n3 g = ConjunctiveGraph() g.parse(StringIO(n3), format="n3") rdfxml = g.serialize() return rdfxml
def store_ontology( default_graph_uri ): # graph_local = Graph( identifier = URIRef( default_graph_uri ) ) print "Preparing to read graph: " + default_graph_uri + "\n" # graph_local.parse( default_graph_uri ) # print "Read Graph saving to database" configString = "host=dbclass,user=brandon,password=b2341x,db=portal" store = plugin.get( 'MySQL', Store)('rdflib') rt = store.open(configString, create=False) if rt == NO_STORE: store.open(configString, create=True) else: assert rt == VALID_STORE, "The underlying store is not valid" print "Opened Store" graph = Graph(store, identifier = URIRef( default_graph_uri )) # graph = Graph( identifier = URIRef( default_graph_uri )) # for stmt in graph_local: # graph.add( stmt ) graph.parse( default_graph_uri ) print "Committing Graph to database\n" graph.commit() print "Saved Graph to database\n" store.close() return len( graph )
def testBasic(DEBUG = False): from glob import glob from sre import sub for testFile in glob('data/examples/*.rq'):#glob('data/*/*.rq'): store = plugin.get(STORE,Store)() bootStrapStore(store) store.commit() prefix = testFile.split('.rq')[-1] manifestPath = '/'.join(testFile.split('/')[:-1]+['manifest.n3']) manifestPath2 = '/'.join(testFile.split('/')[:-1]+['manifest.ttl']) queryFileName = testFile.split('/')[-1] store = plugin.get(STORE,Store)() store.open(configString,create=False) assert len(store) == 0 manifestG=ConjunctiveGraph(store) if not os.path.exists(manifestPath): assert os.path.exists(manifestPath2) manifestPath = manifestPath2 manifestG.default_context.parse(open(manifestPath),publicID=TEST_BASE,format='n3') manifestData = \ manifestG.query( PARSED_MANIFEST_QUERY, initBindings={'?query' : TEST_BASE[queryFileName]}, initNs=manifestNS, DEBUG = False) store.rollback() store.close() for source,testCaseName,testCaseComment,expectedRT in manifestData: if expectedRT: expectedRT = '/'.join(testFile.split('/')[:-1]+[expectedRT.replace(TEST_BASE,'')]) if source: source = '/'.join(testFile.split('/')[:-1]+[source.replace(TEST_BASE,'')]) testCaseName = testCaseComment and testCaseComment or testCaseName print "## Source: %s ##"%source print "## Test: %s ##"%testCaseName print "## Result: %s ##"%expectedRT #Expected results if expectedRT: store = plugin.get(STORE,Store)() store.open(configString,create=False) resultG=ConjunctiveGraph(store).default_context # if DEBUG: # print "###"*10 # print "parsing: ", open(expectedRT).read() # print "###"*10 assert len(store) == 0 print "## Parsing (%s) ##"%(expectedRT) if not trialAndErrorRTParse(resultG,expectedRT,DEBUG): if DEBUG: print "Unexpected result format (for %s), skipping"%(expectedRT) store.rollback() store.close() continue if DEBUG: print "## Done .. ##" rtVars = [rtVar for rtVar in resultG.objects(None,RESULT_NS.resultVariable)] bindings = [] resultSetNode = resultG.value(predicate=RESULT_NS.value,object=RESULT_NS.ResultSet) for solutionNode in resultG.objects(resultSetNode,RESULT_NS.solution): bindingDict = dict([(key,None) for key in rtVars]) for bindingNode in resultG.objects(solutionNode,RESULT_NS.binding): value = resultG.value(subject=bindingNode,predicate=RESULT_NS.value) name = resultG.value(subject=bindingNode,predicate=RESULT_NS.variable) bindingDict[name] = value bindings.append(tuple([bindingDict[vName] for vName in rtVars])) if DEBUG: print "Expected bindings: ", bindings print open(expectedRT).read() store.rollback() store.close() if testFile.startswith('data/NegativeSyntax'): try: query = open(testFile).read() p = Parse(query,DEBUG) except: continue else: raise Exception("Test %s should have failed!"%testFile) if testFile in tests2Skip: print "Skipping test (%s)"%testCaseName continue query = open(testFile).read() print "### %s (%s) ###"%(testCaseName,testFile) print query p = Parse(query,DEBUG_PARSE) if DEBUG: print p if EVALUATE and source: if DEBUG: print "### Source Graph: ###" print open(source).read() store = plugin.get(STORE,Store)() store.open(configString,create=False) g=ConjunctiveGraph(store) try: g.parse(open(source),format='n3') except: print "Unexpected data format (for %s), skipping"%(source) store.rollback() store.close() continue #print store rt = g.query(p,DEBUG = DEBUG) if expectedRT: if rt != bindings and Set([Set(i) for i in rt]) != Set([Set(i) for i in bindings]):#unorderedComparison(rt,bindings): print "### Expected Result (%s) ###"%expectedRT pprint(bindings) print "### Actual Results ###" pprint(rt) raise Exception("### TEST FAILED!: %s ###"%testCaseName) else: print "### TEST PASSED!: %s ###"%testCaseName store.rollback()
class sparql_funcs(): def __init__(self): self.g = Graph('IOMemory') #self.endpoint = "http://www.opencorrespondence.org/data/endpoint/rdf" #self.g.bind('geo', geo) def find_places(self): ''' Function to get the distinct locations mentioned in the headers of the letters. These are the locations from which Dickens wrote. TODO: Parsing the letters to get the places mentioned in them ''' row = set() o = OFS() for b in o.list_buckets(): endpoint = o.get_stream(b, "endpoint") self.g.parse(endpoint) for s, _, n in self.g.triples((None, dublin_core['title'], None)): loc_key = urllib.unquote( n.replace("http://www.opencorrespondence.org/place/resource/", "").replace("/rdf", "")) row.add(self.tidy_location(loc_key)) return row def tidy_location(self, location): ''' Function to tidy up some of the places where they refer to the same place TODO: prob need some language processing to make this scalable ''' ret_location = '' if location == 'Office Of "household Words,': ret_location = "Household Words" elif location == '"household Words" Office': ret_location = "Household Words" elif location == '"household Words"': ret_location = "Household Words" elif location == 'H. W. Office': ret_location = "Household Words" elif location == '"household Words,': ret_location = "Household Words" elif location == '"all The Year Round" Office': ret_location = "All The Year Round" elif location == 'Office Of "all The Year Round,': ret_location = "All The Year Round" elif location == "Gad's Hill Place": ret_location = "Gads Hill" elif location == "Gad's Hill": ret_location = "Gads Hill" elif location == "Gad's Hill Place, Higham": ret_location = "Gads Hill" elif location == "Tavistock House, Tavistock Square": ret_location = "Tavistock House" elif location == "London, Tavistock House": ret_location = "Tavistock House" elif location == "Tavistock House, London": ret_location = "Tavistock House" else: if "U.s." in location: location = str(location).replace("U.s", "") ret_location = str(location).replace(".", "") return ret_location def find_correspondents(self): ''' Function to get the distinct locations mentioned in the headers of the letters. These are the locations from which Dickens wrote. TODO: Parsing the letters to get the places mentioned in them ''' row = set() self.g.parse(self.endpoint) for s, _, n in self.g.triples((None, letter['correspondent'], None)): loc_key = urllib.unquote( n.replace( "http://www.opencorrespondence.org/correspondent/resource/", "").replace("/rdf", "")) row.add(loc_key) return row def get_abstract(self, resource_id): self.g.parse('http://dbpedia.org/resource/'.resource_id) q = ''' SELECT * WHERE { ?x dbpedia:abstract ?abstract . FILTER (lang(?abstract) = 'en') } ''' for row in self.g.query( q, initNs=dict(dbpedia=Namespace("http://dbpedia.org/ontology/")), initBindings={}): return row[1] def query_dates(self, author): '''query to identify individual dates to a correspondent''' q = ''' SELECT ?date FROM <http://localhost:5000/data/endpoint/rdf> WHERE { ?r dc:subject \'''' + author + '''\' . ?r dc:date ?date. } ''' dates = [] for row in self.g.query( q, initNs=dict(letter=Namespace( "http://www.opencorrespondence.org/schema#"), dc=Namespace("http://purl.org/dc/elements/1.1/")), initBindings={}): date = str(row[0]).split('-') if date[0][1:].isdigit(): dates.append(date[0]) print dates dic = {} for dt in dates: dic[dt] = dates.count(dt) return dic
def testParse(self): g = ConjunctiveGraph() g.parse("http://groups.csail.mit.edu/dig/2005/09/rein/examples/troop42-policy.n3", format="n3")
def schemadoc(uris): G = Graph() for uri in uris: G.parse(uri) print """ <html xmlns="http://www.w3.org/1999/xhtml"> <head> <title>Schema Documentation</title> <style type="text/css"> body { margin: 1em; font-family: Georgia, sans-serif; } h1 { font-family: Tahoma, sans-serif; } h2, h3, h4, h5, h6 { font-family: Arial, sans-serif; } a { font-weight: bold; color: #036; } dt.class { margin-top: 0.75em; } dt.property { margin-top: 0.75em; } address { padding-top: 0.35em; border-top: 2px solid #369; } </style> </head> <body> <h1>Schema Documentation</h1> """ classes = [] for metaclass in [RDFS.Class, OWL.Class]: for uri in G.subjects(RDF.type, metaclass): if not isinstance(uri, URIRef): continue c = Class(uri) c.classes = [Class(u) for u in G.objects(uri, RDFS.subClassOf) if isinstance(u, URIRef)] for prop in G.subjects(RDFS.domain, uri): p = Property(prop) ranges = [Class(u) for u in G.objects(prop, RDFS.range)] c.properties.append((p, ranges)) # c.properties = [Property(u) for u in G.subjects(RDFS.domain, uri)] c.comments = [str(s) for s in G.objects(uri, RDFS.comment)] classes.append(c) print '<h2>Classes</h2>' print '<ul>' for c in sorted(classes): print '<li>' print '<dl>' print '<dt class="class">' sys.stdout.write(c.name()) if c.classes: o = ', '.join(cls.name(format='text') for cls in sorted(c.classes)) print '(' + o + ')' else: print print '</dt>' for comment in c.comments: print '<dd>' print comment print '</dd>' for prop, ranges in sorted(c.properties): print '<dd>' print ' ' + prop.name() if ranges: print ' => ' + ', '.join(range.name() for range in ranges) print '</dd>' print '</dt>' print '</li>' print '</ul>' print '<h2>Properties</h2>' properties = [] print '<dl>' for propclass in [RDF.Property, OWL.FunctionalProperty, OWL.InverseFunctionalProperty]: for uri in G.subjects(RDF.type, propclass): if not isinstance(uri, URIRef): continue p = Property(uri) properties.append(p) p.kind = Class(propclass) p.domains = [Class(u) for u in G.objects(uri, RDFS.domain) if isinstance(u, URIRef)] p.ranges = [Class(u) for u in G.objects(uri, RDFS.range) if isinstance(u, URIRef)] p.comments = [str(s) for s in G.objects(uri, RDFS.comment)] for p in sorted(properties): print '<dt class="property">' print p.name() + ' (' + p.kind.name(format='text') + ')' print '</dt>' for comment in p.comments: print '<dd>' print comment print '</dd>' if p.domains: print '<dd>domain: ' print ', '.join(domain.name() for domain in p.domains) print '</dd>' if p.ranges: print '<dd>range: ' print ', '.join(range.name() for range in p.ranges) print '</dd>' print '</dl>' print '<address>' print 'Generated by <a href="http://inamidst.com/proj/sdoc/"' print '>Schemadoc</a>' print '</address>' print '</body>' print '</html>'
if parse.bozo: try: raise parse.bozo_exception except (SAXParseException, feedparser.CharacterEncodingUnknown, feedparser.NonXMLContentType): self.stats.count_invalidfeed() raise RSSParsingFeedMalformedError(feed) except feedparser.CharacterEncodingOverride: pass if parse.version: graph = ConjunctiveGraph() if parse.version in ("rss10"): logging.debug("Looks like RDF (%s)" % parse.version) try: graph.parse(feed, format="xml") except (SAXParseException, RdflibParserError), e: self.stats.count_invalidfeed() raise RSSParsingFeedMalformedError(feed) self.stats.count_rss1feed() elif parse.version in ("rss20", "rss094"): logging.debug("Looks like XML (%s)" % parse.version) try: transformed_feed = self._transform_feed(feed, parse.encoding) except Exception: # Ignoring all errors self.stats.count_invalidfeed() raise RSSParsingXSLTError(feed) try: graph.parse(StringIO(transformed_feed), format="xml") except (SAXParseException, RdflibParserError), e: self.stats.count_invalidfeed()
def testFileUploadBulk(self): for i in range(0, 1500): """Upload file to dataset - POST file to /silo_name/datasets/dataset_name""" # Create a new dataset, check response start = datetime.now() dataset_id= uuid4().hex #dataset_id='TestSubmission%d'%i f = open('test_times.log', 'a') f.write('%s: Creating and uploading file to dataset %s \n'%(start.isoformat(), dataset_id)) f.close() self.createSubmissionDataset(dataset_id=dataset_id) #Access state information (resp, respdata) = self.doHTTP_GET( resource="states/%s"%dataset_id, expect_status=200, expect_reason="OK", expect_type="application/json") # Upload zip file, check response zipdata = self.uploadSubmissionZipfile(dataset_id=dataset_id, file_to_upload='images.zip', filename='images.zip') end = datetime.now() delta = end - start time_used = delta.days * 86400 + delta.seconds f = open('test_times.log', 'a') f.write(' Time taken: %s \n\n'%str(time_used)) f.close() # Access and check list of contents (resp, rdfdata) = self.doHTTP_GET( resource="datasets/%s"%dataset_id, expect_status=200, expect_reason="OK", expect_type="application/rdf+xml") rdfgraph = Graph() rdfstream = StringIO(rdfdata) rdfgraph.parse(rdfstream) subj = URIRef(self.getManifestUri("datasets/%s"%dataset_id)) base = self.getManifestUri("datasets/%s/"%dataset_id) dcterms = "http://purl.org/dc/terms/" ore = "http://www.openarchives.org/ore/terms/" oxds = "http://vocab.ox.ac.uk/dataset/schema#" stype = URIRef(oxds+"DataSet") self.assertEqual(len(rdfgraph),12,'Graph length %i' %len(rdfgraph)) self.failUnless((subj,RDF.type,stype) in rdfgraph, 'Testing submission type: '+subj+", "+stype) self.failUnless((subj,URIRef(dcterms+"created"),None) in rdfgraph, 'dcterms:created') self.failUnless((subj,URIRef(ore+"aggregates"),URIRef(base+"images.zip")) in rdfgraph) self.failUnless((subj,URIRef(dcterms+"identifier"),None) in rdfgraph, 'dcterms:identifier') self.failUnless((subj,URIRef(dcterms+"mediator"),None) in rdfgraph, 'dcterms:mediator') self.failUnless((subj,URIRef(dcterms+"rights"),None) in rdfgraph, 'dcterms:rights') self.failUnless((subj,URIRef(dcterms+"license"),None) in rdfgraph, 'dcterms:license') self.failUnless((subj,URIRef(dcterms+"publisher"),None) in rdfgraph, 'dcterms:publisher') self.failUnless((subj,URIRef(oxds+"isEmbargoed"),None) in rdfgraph, 'oxds:isEmbargoed') self.failUnless((subj,URIRef(oxds+"currentVersion"),'1') in rdfgraph, 'oxds:currentVersion') self.failUnless((subj,URIRef(dcterms+"modified"),None) in rdfgraph, 'dcterms:modified') # Access and check zip file content (resp, zipfile) = self.doHTTP_GET( resource="datasets/%s/images.zip"%dataset_id, expect_status=200, expect_reason="OK", expect_type="application/zip") self.assertEqual(zipdata, zipfile, "Difference between local and remote zipfile!") #Access state information and check (resp, data) = self.doHTTP_GET( resource="states/%s"%dataset_id, expect_status=200, expect_reason="OK", expect_type="application/json") state = data['state'] parts = data['parts'] self.assertEqual(len(state.keys()), 12, "States") self.assertEqual(state['item_id'], dataset_id, "Submission item identifier") self.assertEqual(len(state['versions']), 2, "Two versions") self.assertEqual(state['versions'][0], '0', "Version 0") self.assertEqual(state['versions'][1], '1', "Version 1") self.assertEqual(state['currentversion'], '1', "Current version == 1") self.assertEqual(state['rdffileformat'], 'xml', "RDF file type") self.assertEqual(state['rdffilename'], 'manifest.rdf', "RDF file name") self.assertEqual(state['files']['0'], ['manifest.rdf'], "List should contain just manifest.rdf") self.assertEqual(len(state['files']['1']), 2, "List should contain manifest.rdf and images.zip") self.assertEqual(len(state['metadata_files']['0']), 0, "metadata_files of version 0") self.assertEqual(len(state['metadata_files']['1']), 0, "metadata_files of version 1") self.assertEqual(len(state['subdir']['0']), 0, "Subdirectory count for version 0") self.assertEqual(len(state['subdir']['1']), 0, "Subdirectory count for version 1") self.assertEqual(state['metadata']['createdby'], RDFDatabankConfig.endpointuser, "Created by") d = (datetime.now() + relativedelta(years=+70)).isoformat() d = d.split('T')[0] self.assertEqual(state['metadata']['embargoed'], True, "Embargoed?") self.assertTrue(d in state['metadata']['embargoed_until'], "embargoed_until %s?"%d) self.failUnless('Created new data package' in state['versionlog']['0'], "Version 0 log") self.failUnless('Added or updated file images.zip' in state['versionlog']['1'], "Version 1 log") self.assertEqual(len(parts.keys()), 4, "Parts") self.assertEqual(len(parts['4=%s'%dataset_id].keys()), 13, "File stats for 4=%s"%dataset_id) self.assertEqual(len(parts['manifest.rdf'].keys()), 13, "File stats for manifest.rdf") self.assertEqual(len(parts['images.zip'].keys()), 13, "File stats for images.zip")
def testN3Store(store="default", configString=None): g = ConjunctiveGraph(store=store) if configString: g.destroy(configString) g.open(configString) g.parse(StringInputSource(testN3), format="n3") print g.store try: for s, p, o in g.triples((None, implies, None)): formulaA = s formulaB = o assert type(formulaA) == QuotedGraph and type(formulaB) == QuotedGraph a = URIRef('http://test/a') b = URIRef('http://test/b') c = URIRef('http://test/c') d = URIRef('http://test/d') v = Variable('y') universe = ConjunctiveGraph(g.store) #test formula as terms assert len(list(universe.triples((formulaA, implies, formulaB)))) == 1 #test variable as term and variable roundtrip assert len(list(formulaB.triples((None, None, v)))) == 1 for s, p, o in formulaB.triples((None, d, None)): if o != c: assert isinstance(o, Variable) assert o == v s = list(universe.subjects(RDF.type, RDFS.Class))[0] assert isinstance(s, BNode) assert len(list(universe.triples((None, implies, None)))) == 1 assert len(list(universe.triples((None, RDF.type, None)))) == 1 assert len(list(formulaA.triples((None, RDF.type, None)))) == 1 assert len(list(formulaA.triples((None, None, None)))) == 2 assert len(list(formulaB.triples((None, None, None)))) == 2 assert len(list(universe.triples((None, None, None)))) == 3 assert len( list(formulaB.triples((None, URIRef('http://test/d'), None)))) == 2 assert len( list(universe.triples((None, URIRef('http://test/d'), None)))) == 1 #context tests #test contexts with triple argument assert len(list(universe.contexts((a, d, c)))) == 1 #Remove test cases universe.remove((None, implies, None)) assert len(list(universe.triples((None, implies, None)))) == 0 assert len(list(formulaA.triples((None, None, None)))) == 2 assert len(list(formulaB.triples((None, None, None)))) == 2 formulaA.remove((None, b, None)) assert len(list(formulaA.triples((None, None, None)))) == 1 formulaA.remove((None, RDF.type, None)) assert len(list(formulaA.triples((None, None, None)))) == 0 universe.remove((None, RDF.type, RDFS.Class)) #remove_context tests universe.remove_context(formulaB) assert len(list(universe.triples((None, RDF.type, None)))) == 0 assert len(universe) == 1 assert len(formulaB) == 0 universe.remove((None, None, None)) assert len(universe) == 0 g.store.destroy(configString) except: g.store.destroy(configString) raise
def update_rdf_for_conversion(vocabprefix, vocab_properties, rdf_vocab_properties): html_vocab_properties = {} html_vocab_properties['format'] = 'text/html' html_vocab_properties['name'] = "%s.html" % os.path.splitext( rdf_vocab_properties['name'])[0] html_vocab_properties['path'] = rdf_vocab_properties['path'].replace( rdf_vocab_properties['name'], html_vocab_properties['name']) html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace( rdf_vocab_properties['name'], html_vocab_properties['name']) newrdf_vocab_properties = {} newrdf_vocab_properties['format'] = 'application/rdf+xml' newrdf_vocab_properties['name'] = "%s_modified.rdf" % os.path.splitext( rdf_vocab_properties['name'])[0] newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace( rdf_vocab_properties['name'], newrdf_vocab_properties['name']) newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace( rdf_vocab_properties['name'], newrdf_vocab_properties['name']) graph = Graph() graph.parse(rdf_vocab_properties['path']) graph_ns = [] for nsprefix, nsurl in graph.namespaces(): graph_ns.append(str(nsurl)) ET._namespace_map[str(nsurl)] = str(nsprefix) for prefix, url in namespaces.iteritems(): if not str(url) in graph_ns: ET._namespace_map[str(url)] = str(prefix) def_tags = [ "{http://www.w3.org/2000/01/rdf-schema#}Class".lower(), "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Property".lower(), "{http://www.w3.org/2002/07/owl#}ObjectProperty".lower(), "{http://www.w3.org/2002/07/owl#}DatatypeProperty".lower(), "{http://www.w3.org/2002/07/owl#}Class".lower(), ] tree = ET.ElementTree(file=rdf_vocab_properties['path']) ns_uri = vocab_properties['preferredNamespaceUri'] html_uri = html_vocab_properties['uri'] rdf_uri = rdf_vocab_properties['uri'] tree_root = tree.getroot() #vocab= tree_root.findall("{http://www.w3.org/2002/07/owl#}Ontology") vocab = tree_root.find("{http://www.w3.org/2002/07/owl#}Ontology") if vocab: #for vocab in vocabs: if not vocab.findall("{http://purl.org/dc/elements/1.1/}identifier"): se0 = ET.SubElement( vocab, "{http://purl.org/dc/elements/1.1/}identifier") se0.text = rdf_uri if not vocab.findall("{http://purl.org/dc/terms/}isVersionOf"): se1 = ET.SubElement( vocab, "{http://purl.org/dc/terms/}isVersionOf", attrib={ "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource": ns_uri }) if not vocab.findall( "{http://purl.org/vocab/vann/}preferredNamespacePrefix"): se2a = ET.SubElement( vocab, "{http://purl.org/vocab/vann/}preferredNamespacePrefix") se2a.text = vocab_properties['preferredNamespacePrefix'] if not vocab.findall( "{http://purl.org/vocab/vann/}preferredNamespaceUri"): se2b = ET.SubElement( vocab, "{http://purl.org/vocab/vann/}preferredNamespaceUri") se2b.text = vocab_properties['preferredNamespaceUri'] if not vocab.findall("{http://purl.org/dc/terms/}hasFormat"): #Add html uri - html_vocab_properties['uri'] se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat") se3b = ET.SubElement( se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={ "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about": html_uri }) se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format") se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT") se3e = ET.SubElement( se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value") se3e.text = 'text/html' #ET.TreeBuilder.data('text/html') se3f = ET.SubElement( se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang": "en"}) se3f.text = 'HTML' #ET.TreeBuilder.data('HTML') #Add rdf uri - rdf_vocab_properties['uri'] se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat") se3b = ET.SubElement( se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={ "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about": rdf_uri }) se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format") se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT") se3e = ET.SubElement( se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value") se3e.text = 'application/rdf+xml' #ET.TreeBuilder.data('application/rdf+xml') se3f = ET.SubElement( se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang": "en"}) se3f.text = 'RDF' #ET.TreeBuilder.data('RDF') else: #Check the formats available and add if necessary formats = vocab.findall("{http://purl.org/dc/terms/}hasFormat") available_types = [] for f in formats: type_tags = f.findall( ".//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value") for type_tag in type_tags: if type_tag.attrib.get( "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource" ): ftype = type_tag.attrib.get( "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource" ) elif type_tag.text: ftype = type_tag.text if ftype and 'html' in ftype.lower(): available_types.append('html') elif ftype and 'rdf' in ftype.lower(): available_types.append('rdf') if not 'html' in available_types: #Add html file - vocabfile_html se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat") se3b = ET.SubElement( se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={ "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about": html_uri }) se3c = ET.SubElement( se3b, "{http://purl.org/dc/elements/1.1/}format") se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT") se3e = ET.SubElement( se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value") se3e.text = 'text/html' #ET.TreeBuilder.data('text/html') se3f = ET.SubElement( se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={ "{http://www.w3.org/XML/1998/namespace}lang": "en" }) se3f.text = 'HTML' #ET.TreeBuilder.data('HTML') if not 'rdf' in available_types: #Add rdf file - vocabfile se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat") se3b = ET.SubElement( se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={ "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about": rdf_uri }) se3c = ET.SubElement( se3b, "{http://purl.org/dc/elements/1.1/}format") se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT") se3e = ET.SubElement( se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value") se3e.text = 'application/rdf+xml' #ET.TreeBuilder.data('application/rdf+xml') se3f = ET.SubElement( se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={ "{http://www.w3.org/XML/1998/namespace}lang": "en" }) se3f.text = 'RDF' #ET.TreeBuilder.data('RDF') else: vocab = ET.SubElement( tree_root, "{http://www.w3.org/2002/07/owl#}Ontology", attrib={ "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about": ns_uri }) se0 = ET.SubElement(vocab, "{http://purl.org/dc/elements/1.1/}identifier") se0.text = rdf_uri se1 = ET.SubElement( vocab, "{http://purl.org/dc/terms/}isVersionOf", attrib={ "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource": ns_uri }) se2a = ET.SubElement( vocab, "{http://purl.org/vocab/vann/}preferredNamespacePrefix") se2a.text = vocab_properties['preferredNamespacePrefix'] se2b = ET.SubElement( vocab, "{http://purl.org/vocab/vann/}preferredNamespaceUri") se2b.text = vocab_properties['preferredNamespaceUri'] #Add html uri - html_vocab_properties['uri'] se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat") se3b = ET.SubElement( se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={ "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about": html_uri }) se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format") se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT") se3e = ET.SubElement( se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value") se3e.text = 'text/html' se3f = ET.SubElement( se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang": "en"}) se3f.text = 'HTML' #Add rdf uri - rdf_vocab_properties['uri'] se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat") se3b = ET.SubElement( se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={ "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about": rdf_uri }) se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format") se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT") se3e = ET.SubElement( se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value") se3e.text = 'application/rdf+xml' se3f = ET.SubElement( se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang": "en"}) se3f.text = 'RDF' terms = tree_root.getiterator() #terms = vocab.getiterator() for term in terms: if term.tag.lower().strip() in def_tags: defby = None defby = term.find( "{http://www.w3.org/2000/01/rdf-schema#}isDefinedBy") if not defby: se4 = ET.SubElement( term, "{http://www.w3.org/2000/01/rdf-schema#}isDefinedBy", attrib={ "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource": ns_uri }) #Move ontology to the first element tree_root.remove(vocab) tree_root.insert(0, vocab) tree.write(newrdf_vocab_properties['path']) #tree_root.write(newrdf_vocab_properties['path']) return (newrdf_vocab_properties, html_vocab_properties)
def schemadoc(uris): G = Graph() for uri in uris: G.parse(uri) print """ <html xmlns="http://www.w3.org/1999/xhtml"> <head> <title>Schema Documentation</title> <style type="text/css"> body { margin: 1em; font-family: Georgia, sans-serif; } h1 { font-family: Tahoma, sans-serif; } h2, h3, h4, h5, h6 { font-family: Arial, sans-serif; } a { font-weight: bold; color: #036; } dt.class { margin-top: 0.75em; } dt.property { margin-top: 0.75em; } address { padding-top: 0.35em; border-top: 2px solid #369; } </style> </head> <body> <h1>Schema Documentation</h1> """ classes = [] for metaclass in [RDFS.Class, OWL.Class]: for uri in G.subjects(RDF.type, metaclass): if not isinstance(uri, URIRef): continue c = Class(uri) c.classes = [ Class(u) for u in G.objects(uri, RDFS.subClassOf) if isinstance(u, URIRef) ] for prop in G.subjects(RDFS.domain, uri): p = Property(prop) ranges = [Class(u) for u in G.objects(prop, RDFS.range)] c.properties.append((p, ranges)) # c.properties = [Property(u) for u in G.subjects(RDFS.domain, uri)] c.comments = [str(s) for s in G.objects(uri, RDFS.comment)] classes.append(c) print '<h2>Classes</h2>' print '<ul>' for c in sorted(classes): print '<li>' print '<dl>' print '<dt class="class">' sys.stdout.write(c.name()) if c.classes: o = ', '.join(cls.name(format='text') for cls in sorted(c.classes)) print '(' + o + ')' else: print print '</dt>' for comment in c.comments: print '<dd>' print comment print '</dd>' for prop, ranges in sorted(c.properties): print '<dd>' print ' ' + prop.name() if ranges: print ' => ' + ', '.join(range.name() for range in ranges) print '</dd>' print '</dt>' print '</li>' print '</ul>' print '<h2>Properties</h2>' properties = [] print '<dl>' for propclass in [ RDF.Property, OWL.FunctionalProperty, OWL.InverseFunctionalProperty ]: for uri in G.subjects(RDF.type, propclass): if not isinstance(uri, URIRef): continue p = Property(uri) properties.append(p) p.kind = Class(propclass) p.domains = [ Class(u) for u in G.objects(uri, RDFS.domain) if isinstance(u, URIRef) ] p.ranges = [ Class(u) for u in G.objects(uri, RDFS.range) if isinstance(u, URIRef) ] p.comments = [str(s) for s in G.objects(uri, RDFS.comment)] for p in sorted(properties): print '<dt class="property">' print p.name() + ' (' + p.kind.name(format='text') + ')' print '</dt>' for comment in p.comments: print '<dd>' print comment print '</dd>' if p.domains: print '<dd>domain: ' print ', '.join(domain.name() for domain in p.domains) print '</dd>' if p.ranges: print '<dd>range: ' print ', '.join(range.name() for range in p.ranges) print '</dd>' print '</dl>' print '<address>' print 'Generated by <a href="http://inamidst.com/proj/sdoc/"' print '>Schemadoc</a>' print '</address>' print '</body>' print '</html>'
def main(): """Uploads many RDF graphs into storage.""" uris = [ 'http://tech.slashdot.org/story/09/11/02/0734253/Transpacific-Unity-Fiber-Optic-Cable-Leaves-Japan', 'http://games.slashdot.org/story/09/10/29/2026217/Nintendo-Announces-DSi-XL', 'http://games.slashdot.org/story/09/10/28/1634218/Leaked-Modern-Warfare-2-Footage-Causes-Outrage', 'http://games.slashdot.org/story/09/11/01/1421253/Scams-and-Social-Gaming', 'http://apple.slashdot.org/story/09/10/26/2256212/Psystars-Rebel-EFI-Hackintosh-Tool-Reviewed-Found-Wanting', 'http://yro.slashdot.org/story/09/11/03/0331227/Feds-Bust-Cable-Modem-Hacker', 'http://hardware.slashdot.org/story/09/11/02/2048234/Europe-Launches-Flood-Predicting-Satellite-and-Test-Probe', 'http://yro.slashdot.org/story/09/11/02/1411252/An-Inbox-Is-Not-a-Glove-Compartment', 'http://science.slashdot.org/story/09/11/03/1554207/2-Companies-Win-NASAs-Moon-Landing-Prize-Money', 'http://news.slashdot.org/story/09/11/03/1751232/Rise-of-the-Robot-Squadrons', 'http://it.slashdot.org/story/09/10/21/2120251/Some-Users-Say-Win7-Wants-To-Remove-iTunes-Google-Toolbar', 'http://apple.slashdot.org/story/09/10/23/1456221/Apple-Seeks-Patent-On-Operating-System-Advertising', 'http://games.slashdot.org/story/09/10/29/0225250/Physics-Rebel-Aims-To-Shake-Up-the-Video-Game-World', 'http://games.slashdot.org/story/09/10/28/030237/2D-Boy-Posts-Pay-What-You-Want-Final-Wrap-up', 'http://it.slashdot.org/story/09/11/02/1622218/IT-Snake-Oil-mdash-Six-Tech-Cure-Alls-That-Went-Bunk', 'http://apple.slashdot.org/story/09/10/20/1833228/Apple-Blurs-the-Server-Line-With-Mac-Mini-Server', 'http://games.slashdot.org/story/09/11/02/1530221/Free-3G-Wireless-For-Nintendos-Next-Handheld', 'http://hardware.slashdot.org/story/09/11/03/1530258/Dell-Rugged-Laptops-Not-Quite-Tough-Enough', 'http://linux.slashdot.org/story/09/11/03/2211231/Some-Early-Adopters-Stung-By-Ubuntus-Karmic-Koala', 'http://hardware.slashdot.org/story/09/10/31/0120223/Contest-To-Hack-Brazilian-Voting-Machines', 'http://ask.slashdot.org/story/09/10/25/1615203/Low-Power-Home-Linux-Server', 'http://games.slashdot.org/story/09/10/30/0149253/FCC-Mulling-More-Control-For-Electronic-Media', 'http://mobile.slashdot.org/story/09/11/03/1649246/Unfinished-Windows-7-Hotspot-Feature-Exploited', 'http://games.slashdot.org/story/09/10/30/2040230/Nokias-N-Gage-Service-To-End-After-2010', 'http://linux.slashdot.org/story/09/10/29/128205/Ubuntu-910-Officially-Released', 'http://ask.slashdot.org/story/09/10/30/2126252/Installing-Linux-On-Old-Hardware', 'http://games.slashdot.org/story/09/10/31/1428225/Controlling-Games-and-Apps-Through-Muscle-Sensors', 'http://tech.slashdot.org/story/09/11/01/2131249/uTorrent-To-Build-In-Transfer-Throttling-Ability', 'http://news.slashdot.org/story/09/11/02/2342258/Microsoft-Links-Malware-Rates-To-Pirated-Windows', 'http://apple.slashdot.org/story/09/10/29/0311214/Speech-to-Speech-Translator-Developed-For-iPhone', 'http://games.slashdot.org/story/09/10/30/022242/DampD-On-Google-Wave', 'http://science.slashdot.org/story/09/11/02/1435227/Bacteria-Could-Survive-In-Martian-Soil', 'http://apple.slashdot.org/story/09/11/02/0853219/For-September-Book-Related-Apps-Overtook-Games-On-iPhone', 'http://hardware.slashdot.org/story/09/11/03/1427210/Negroponte-Hints-At-Paper-Like-Design-For-XO-3', 'http://science.slashdot.org/story/09/11/03/0313242/Giant-Rift-In-Africa-Will-Create-a-New-Ocean', 'http://yro.slashdot.org/story/09/11/02/132211/Attorney-General-Says-Wiretap-Lawsuit-Must-Be-Thrown-Out', 'http://linux.slashdot.org/story/09/10/25/0450232/Ryan-Gordon-Wants-To-Bring-Universal-Binaries-To-Linux', 'http://science.slashdot.org/story/09/11/01/2145208/Computer-Activities-for-Those-With-Speech-and-Language-Difficulties', 'http://science.slashdot.org/story/09/11/03/1842247/The-Tech-Aboard-the-International-Space-Station', 'http://science.slashdot.org/story/09/11/03/1450211/Scientists-Build-a-Smarter-Rat', 'http://yro.slashdot.org/story/09/11/03/2023209/Spring-Design-Sues-Barnes-amp-Noble-Over-Nook-IP', 'http://apple.slashdot.org/story/09/11/01/195232/Apple-Says-Booting-OS-X-Makes-an-Unauthorized-Copy', 'http://yro.slashdot.org/story/09/10/22/1541220/Nokia-Sues-Apple-For-Patent-Infringement-In-iPhone', 'http://linux.slashdot.org/story/09/10/23/1639234/Ubuntu-Karmic-Koala-RC-Hits-the-Streets-With-Windows-7', 'http://linux.slashdot.org/story/09/10/27/1335227/Comparing-the-Freedoms-Offered-By-Maemo-and-Android' ] i = 0 for uri in uris: # Get the RDF wrdf = Web2Rdf(uri) rdf = wrdf.getRdf() if not rdf: print "No RDF returned!" return False print "Got RDF..." rdf = rdfString(rdf) # Open Storage print "Opening store..." #params = "./newdatabase.sqlite" # SQLITE params = ('localhost', 'tuser', 'tuser', 'rdf') # MYSQL rstore = RdfStore(params) rstore.open() print "Storing..." graph = Graph(rstore.get(), identifier = URIRef("http://slashdot/")) #graph.parse("example.rdf") graph.parse(rdf, publicID=uri) graph.commit() i+=1 print "%d of %d uris complete." % (i, len(uris))
def testN3Store(store="default", configString=None): g = ConjunctiveGraph(store=store) if configString: g.destroy(configString) g.open(configString) g.parse(StringInputSource(testN3), format="n3") print g.store try: for s,p,o in g.triples((None,implies,None)): formulaA = s formulaB = o assert type(formulaA)==QuotedGraph and type(formulaB)==QuotedGraph a = URIRef('http://test/a') b = URIRef('http://test/b') c = URIRef('http://test/c') d = URIRef('http://test/d') v = Variable('y') universe = ConjunctiveGraph(g.store) #test formula as terms assert len(list(universe.triples((formulaA,implies,formulaB))))==1 #test variable as term and variable roundtrip assert len(list(formulaB.triples((None,None,v))))==1 for s,p,o in formulaB.triples((None,d,None)): if o != c: assert isinstance(o,Variable) assert o == v s = list(universe.subjects(RDF.type, RDFS.Class))[0] assert isinstance(s,BNode) assert len(list(universe.triples((None,implies,None)))) == 1 assert len(list(universe.triples((None,RDF.type,None)))) ==1 assert len(list(formulaA.triples((None,RDF.type,None))))==1 assert len(list(formulaA.triples((None,None,None))))==2 assert len(list(formulaB.triples((None,None,None))))==2 assert len(list(universe.triples((None,None,None))))==3 assert len(list(formulaB.triples((None,URIRef('http://test/d'),None))))==2 assert len(list(universe.triples((None,URIRef('http://test/d'),None))))==1 #context tests #test contexts with triple argument assert len(list(universe.contexts((a,d,c))))==1 #Remove test cases universe.remove((None,implies,None)) assert len(list(universe.triples((None,implies,None))))==0 assert len(list(formulaA.triples((None,None,None))))==2 assert len(list(formulaB.triples((None,None,None))))==2 formulaA.remove((None,b,None)) assert len(list(formulaA.triples((None,None,None))))==1 formulaA.remove((None,RDF.type,None)) assert len(list(formulaA.triples((None,None,None))))==0 universe.remove((None,RDF.type,RDFS.Class)) #remove_context tests universe.remove_context(formulaB) assert len(list(universe.triples((None,RDF.type,None))))==0 assert len(universe)==1 assert len(formulaB)==0 universe.remove((None,None,None)) assert len(universe)==0 g.store.destroy(configString) except: g.store.destroy(configString) raise
def testParse(self): g = ConjunctiveGraph() g.parse( "http://groups.csail.mit.edu/dig/2005/09/rein/examples/troop42-policy.n3", format="n3")
def update_rdf_for_conversion(prefix, vocab_properties, rdf_vocab_properties): #(id, base, prefix) = get_vocab_base(vocabfile) html_vocab_properties = {} html_vocab_properties['format'] = 'text/html' html_vocab_properties['name'] = "%s.html" % os.path.splitext( rdf_vocab_properties['name'])[0] html_vocab_properties['path'] = rdf_vocab_properties['path'].replace( rdf_vocab_properties['name'], html_vocab_properties['name']) html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace( rdf_vocab_properties['name'], html_vocab_properties['name']) newrdf_vocab_properties = {} newrdf_vocab_properties['format'] = 'application/rdf+xml' newrdf_vocab_properties['name'] = "%s_modified.rdf" % os.path.splitext( rdf_vocab_properties['name'])[0] newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace( rdf_vocab_properties['name'], newrdf_vocab_properties['name']) newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace( rdf_vocab_properties['name'], newrdf_vocab_properties['name']) graph = Graph() graph.parse(rdf_vocab_properties['path']) subject = None for s in graph.subjects(namespaces['rdf']['type'], URIRef(namespaces['owl']['Ontology'])): subject = s #graph2 = Graph() graph_ns = [] for nsprefix, nsurl in graph.namespaces(): graph_ns.append(str(nsurl)) for prefix, url in namespaces.iteritems(): if not str(url) in graph_ns: graph.bind(prefix, URIRef(url)) #properties = get_vocab_properties(prefix) #subject = None #for s in graph.subjects(namespaces['dc']['title'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dcterms']['title'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dc']['creator'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dcterms']['creator'], None): # subject = s formatNode1 = BNode() formatNode2 = BNode() #Add vocabulary properties identifier and format graph.add((subject, namespaces['dc']['identifier'], URIRef(rdf_vocab_properties['uri']))) graph.add((subject, namespaces['dcterms']['isVersionOf'], URIRef(vocab_properties['preferredNamespaceUri']))) graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(rdf_vocab_properties['uri']))) graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(html_vocab_properties['uri']))) graph.add((subject, namespaces['vann']['preferredNamespaceUri'], URIRef(vocab_properties['preferredNamespaceUri']))) graph.add((subject, namespaces['vann']['preferredNamespacePrefix'], URIRef(vocab_properties['preferredNamespacePrefix']))) graph.add((URIRef(html_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text']))) graph.add((URIRef(html_vocab_properties['uri']), namespaces['dc']['format'], formatNode1)) graph.add((formatNode1, namespaces['rdf']['value'], Literal('text/html'))) graph.add((formatNode1, namespaces['rdfs']['label'], Literal('HTML'))) graph.add((formatNode1, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT']))) graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text']))) graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['dc']['format'], formatNode2)) graph.add((formatNode2, namespaces['rdf']['value'], Literal('application/rdf+xml'))) graph.add((formatNode2, namespaces['rdfs']['label'], Literal('RDF'))) graph.add((formatNode2, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT']))) #Add rdfs:isDefinedBy for each class / property / term of the vocabulary #Find if schema is rdfs / owl. This defines the possible types (rdf:type) for each class / property / term #testo = vocab_type_definitions_test['rdfs'] #subjects = [] #subs = graph.subjects(namespaces['rdf']['type'], URIRef(testo)) #for s in subs: # subjects.append(s) #if subjects: # objects = vocab_type_definitions_rdfs #else: # objects = vocab_type_definitions_owl #For all subjects that are of the type found above, add rdfs:isDefinedBy #for o in objects: # subs = graph.subjects(namespaces['rdf']['type'], o) # for s in subs: # graph.add((s, namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri']))) list_of_terms = get_terms(rdf_vocab_properties['path']) for s in list_of_terms: graph.add((URIRef(s), namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri']))) rdf_str = None rdf_str = graph.serialize(format="pretty-xml") #f = codecs.open(newrdf_vocab_properties['path'], 'w', 'utf-8') f = codecs.open(newrdf_vocab_properties['path'], 'w') f.write(rdf_str) f.close() return (newrdf_vocab_properties, html_vocab_properties)
def update_rdf_for_conversion(prefix, vocab_properties, rdf_vocab_properties): #(id, base, prefix) = get_vocab_base(vocabfile) html_vocab_properties = {} html_vocab_properties['format'] = 'text/html' html_vocab_properties['name'] = "%s.html"%os.path.splitext(rdf_vocab_properties['name'])[0] html_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], html_vocab_properties['name']) html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], html_vocab_properties['name']) newrdf_vocab_properties = {} newrdf_vocab_properties['format'] = 'application/rdf+xml' newrdf_vocab_properties['name'] = "%s_modified.rdf"%os.path.splitext(rdf_vocab_properties['name'])[0] newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name']) newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name']) graph = Graph() graph.parse(rdf_vocab_properties['path']) subject = None for s in graph.subjects(namespaces['rdf']['type'], URIRef(namespaces['owl']['Ontology'])): subject = s #graph2 = Graph() graph_ns = [] for nsprefix, nsurl in graph.namespaces(): graph_ns.append(str(nsurl)) for prefix, url in namespaces.iteritems(): if not str(url) in graph_ns: graph.bind(prefix, URIRef(url)) #properties = get_vocab_properties(prefix) #subject = None #for s in graph.subjects(namespaces['dc']['title'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dcterms']['title'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dc']['creator'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dcterms']['creator'], None): # subject = s formatNode1 = BNode() formatNode2 = BNode() #Add vocabulary properties identifier and format graph.add((subject, namespaces['dc']['identifier'], URIRef(rdf_vocab_properties['uri']))) graph.add((subject, namespaces['dcterms']['isVersionOf'], URIRef(vocab_properties['preferredNamespaceUri']))) graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(rdf_vocab_properties['uri']))) graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(html_vocab_properties['uri']))) graph.add((subject, namespaces['vann']['preferredNamespaceUri'], URIRef(vocab_properties['preferredNamespaceUri']))) graph.add((subject, namespaces['vann']['preferredNamespacePrefix'], URIRef(vocab_properties['preferredNamespacePrefix']))) graph.add((URIRef(html_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text']))) graph.add((URIRef(html_vocab_properties['uri']), namespaces['dc']['format'], formatNode1)) graph.add((formatNode1, namespaces['rdf']['value'], Literal('text/html'))) graph.add((formatNode1, namespaces['rdfs']['label'], Literal('HTML'))) graph.add((formatNode1, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT']))) graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text']))) graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['dc']['format'], formatNode2)) graph.add((formatNode2, namespaces['rdf']['value'], Literal('application/rdf+xml'))) graph.add((formatNode2, namespaces['rdfs']['label'], Literal('RDF'))) graph.add((formatNode2, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT']))) #Add rdfs:isDefinedBy for each class / property / term of the vocabulary #Find if schema is rdfs / owl. This defines the possible types (rdf:type) for each class / property / term #testo = vocab_type_definitions_test['rdfs'] #subjects = [] #subs = graph.subjects(namespaces['rdf']['type'], URIRef(testo)) #for s in subs: # subjects.append(s) #if subjects: # objects = vocab_type_definitions_rdfs #else: # objects = vocab_type_definitions_owl #For all subjects that are of the type found above, add rdfs:isDefinedBy #for o in objects: # subs = graph.subjects(namespaces['rdf']['type'], o) # for s in subs: # graph.add((s, namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri']))) list_of_terms = get_terms(rdf_vocab_properties['path']) for s in list_of_terms: graph.add((URIRef(s), namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri']))) rdf_str = None rdf_str = graph.serialize(format="pretty-xml") #f = codecs.open(newrdf_vocab_properties['path'], 'w', 'utf-8') f = codecs.open(newrdf_vocab_properties['path'], 'w') f.write(rdf_str) f.close() return (newrdf_vocab_properties, html_vocab_properties)
def readSubgraphXML(filename): if SUBGRAPH_FORMAT == 'xml': return open(filename).read() g = ConjunctiveGraph() g.parse(filename, format=SUBGRAPH_FORMAT) return g
def update_rdf_for_conversion(vocabprefix, vocab_properties, rdf_vocab_properties): html_vocab_properties = {} html_vocab_properties['format'] = 'text/html' html_vocab_properties['name'] = "%s.html"%os.path.splitext(rdf_vocab_properties['name'])[0] html_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], html_vocab_properties['name']) html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], html_vocab_properties['name']) newrdf_vocab_properties = {} newrdf_vocab_properties['format'] = 'application/rdf+xml' newrdf_vocab_properties['name'] = "%s_modified.rdf"%os.path.splitext(rdf_vocab_properties['name'])[0] newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name']) newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name']) graph = Graph() graph.parse(rdf_vocab_properties['path']) graph_ns = [] for nsprefix, nsurl in graph.namespaces(): graph_ns.append(str(nsurl)) ET._namespace_map[str(nsurl)] = str(nsprefix) for prefix, url in namespaces.iteritems(): if not str(url) in graph_ns: ET._namespace_map[str(url)] = str(prefix) def_tags = [ "{http://www.w3.org/2000/01/rdf-schema#}Class".lower(), "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Property".lower(), "{http://www.w3.org/2002/07/owl#}ObjectProperty".lower(), "{http://www.w3.org/2002/07/owl#}DatatypeProperty".lower(), "{http://www.w3.org/2002/07/owl#}Class".lower(), ] tree = ET.ElementTree(file=rdf_vocab_properties['path']) ns_uri = vocab_properties['preferredNamespaceUri'] html_uri = html_vocab_properties['uri'] rdf_uri = rdf_vocab_properties['uri'] tree_root = tree.getroot() #vocab= tree_root.findall("{http://www.w3.org/2002/07/owl#}Ontology") vocab= tree_root.find("{http://www.w3.org/2002/07/owl#}Ontology") if vocab: #for vocab in vocabs: if not vocab.findall("{http://purl.org/dc/elements/1.1/}identifier"): se0 = ET.SubElement(vocab, "{http://purl.org/dc/elements/1.1/}identifier") se0.text = rdf_uri if not vocab.findall("{http://purl.org/dc/terms/}isVersionOf"): se1 = ET.SubElement(vocab, "{http://purl.org/dc/terms/}isVersionOf", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":ns_uri}) if not vocab.findall("{http://purl.org/vocab/vann/}preferredNamespacePrefix"): se2a = ET.SubElement(vocab, "{http://purl.org/vocab/vann/}preferredNamespacePrefix") se2a.text = vocab_properties['preferredNamespacePrefix'] if not vocab.findall("{http://purl.org/vocab/vann/}preferredNamespaceUri"): se2b = ET.SubElement(vocab, "{http://purl.org/vocab/vann/}preferredNamespaceUri") se2b.text = vocab_properties['preferredNamespaceUri'] if not vocab.findall("{http://purl.org/dc/terms/}hasFormat"): #Add html uri - html_vocab_properties['uri'] se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat") se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":html_uri}) se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format") se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT") se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value") se3e.text = 'text/html' #ET.TreeBuilder.data('text/html') se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"}) se3f.text = 'HTML' #ET.TreeBuilder.data('HTML') #Add rdf uri - rdf_vocab_properties['uri'] se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat") se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":rdf_uri}) se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format") se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT") se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value") se3e.text = 'application/rdf+xml' #ET.TreeBuilder.data('application/rdf+xml') se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"}) se3f.text = 'RDF' #ET.TreeBuilder.data('RDF') else: #Check the formats available and add if necessary formats = vocab.findall("{http://purl.org/dc/terms/}hasFormat") available_types = [] for f in formats: type_tags = f.findall(".//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value") for type_tag in type_tags: if type_tag.attrib.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"): ftype = type_tag.attrib.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource") elif type_tag.text: ftype = type_tag.text if ftype and 'html' in ftype.lower(): available_types.append('html') elif ftype and 'rdf' in ftype.lower(): available_types.append('rdf') if not 'html' in available_types: #Add html file - vocabfile_html se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat") se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":html_uri}) se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format") se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT") se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value") se3e.text = 'text/html' #ET.TreeBuilder.data('text/html') se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"}) se3f.text = 'HTML' #ET.TreeBuilder.data('HTML') if not 'rdf' in available_types: #Add rdf file - vocabfile se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat") se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":rdf_uri}) se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format") se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT") se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value") se3e.text = 'application/rdf+xml' #ET.TreeBuilder.data('application/rdf+xml') se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"}) se3f.text = 'RDF' #ET.TreeBuilder.data('RDF') else: vocab = ET.SubElement(tree_root, "{http://www.w3.org/2002/07/owl#}Ontology", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":ns_uri}) se0 = ET.SubElement(vocab, "{http://purl.org/dc/elements/1.1/}identifier") se0.text = rdf_uri se1 = ET.SubElement(vocab, "{http://purl.org/dc/terms/}isVersionOf", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":ns_uri}) se2a = ET.SubElement(vocab, "{http://purl.org/vocab/vann/}preferredNamespacePrefix") se2a.text = vocab_properties['preferredNamespacePrefix'] se2b = ET.SubElement(vocab, "{http://purl.org/vocab/vann/}preferredNamespaceUri") se2b.text = vocab_properties['preferredNamespaceUri'] #Add html uri - html_vocab_properties['uri'] se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat") se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":html_uri}) se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format") se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT") se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value") se3e.text = 'text/html' se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"}) se3f.text = 'HTML' #Add rdf uri - rdf_vocab_properties['uri'] se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat") se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":rdf_uri}) se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format") se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT") se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value") se3e.text = 'application/rdf+xml' se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"}) se3f.text = 'RDF' terms = tree_root.getiterator() #terms = vocab.getiterator() for term in terms: if term.tag.lower().strip() in def_tags: defby = None defby = term.find("{http://www.w3.org/2000/01/rdf-schema#}isDefinedBy") if not defby: se4 = ET.SubElement(term, "{http://www.w3.org/2000/01/rdf-schema#}isDefinedBy", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":ns_uri}) #Move ontology to the first element tree_root.remove(vocab) tree_root.insert(0, vocab) tree.write(newrdf_vocab_properties['path']) #tree_root.write(newrdf_vocab_properties['path']) return (newrdf_vocab_properties, html_vocab_properties)
# Named graph: http://example.org/foaf/bobFoaf @prefix foaf: <http://xmlns.com/foaf/0.1/> . @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . _:z foaf:mbox <mailto:[email protected]> . _:z rdfs:seeAlso <http://example.org/foaf/bobFoaf> . _:z foaf:nick "Robert" . <http://example.org/foaf/bobFoaf> rdf:type foaf:PersonalProfileDocument . """ graph = ConjunctiveGraph(plugin.get('IOMemory', Store)()) graph.parse(StringIO(text), format="n3") print graph.serialize(format='xml') test_query = """ PREFIX data: <http://example.org/foaf/> PREFIX foaf: <http://xmlns.com/foaf/0.1/> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT ?mbox ?nick ?ppd WHERE { GRAPH data:aliceFoaf { ?alice foaf:mbox <mailto:[email protected]> ; foaf:knows ?whom . ?whom foaf:mbox ?mbox ;
class sparql_funcs(): def __init__(self): self.g = Graph('IOMemory') #self.endpoint = "http://www.opencorrespondence.org/data/endpoint/rdf" #self.g.bind('geo', geo) def find_places(self): ''' Function to get the distinct locations mentioned in the headers of the letters. These are the locations from which Dickens wrote. TODO: Parsing the letters to get the places mentioned in them ''' row = set() o = OFS() for b in o.list_buckets(): endpoint = o.get_stream(b, "endpoint") self.g.parse(endpoint) for s,_,n in self.g.triples((None, dublin_core['title'], None)): loc_key = urllib.unquote(n.replace("http://www.opencorrespondence.org/place/resource/", "").replace("/rdf","")) row.add(self.tidy_location(loc_key)) return row def tidy_location (self, location): ''' Function to tidy up some of the places where they refer to the same place TODO: prob need some language processing to make this scalable ''' ret_location = ''; if location == 'Office Of "household Words,': ret_location = "Household Words" elif location== '"household Words" Office': ret_location = "Household Words" elif location== '"household Words"': ret_location = "Household Words" elif location== 'H. W. Office': ret_location = "Household Words" elif location == '"household Words,': ret_location = "Household Words" elif location == '"all The Year Round" Office': ret_location = "All The Year Round" elif location == 'Office Of "all The Year Round,': ret_location = "All The Year Round" elif location == "Gad's Hill Place": ret_location = "Gads Hill" elif location == "Gad's Hill": ret_location = "Gads Hill" elif location == "Gad's Hill Place, Higham": ret_location = "Gads Hill" elif location == "Tavistock House, Tavistock Square": ret_location = "Tavistock House" elif location == "London, Tavistock House": ret_location = "Tavistock House" elif location == "Tavistock House, London": ret_location = "Tavistock House" else: if "U.s." in location: location = str(location).replace("U.s", "") ret_location = str(location).replace(".", "") return ret_location def find_correspondents(self): ''' Function to get the distinct locations mentioned in the headers of the letters. These are the locations from which Dickens wrote. TODO: Parsing the letters to get the places mentioned in them ''' row = set() self.g.parse(self.endpoint) for s,_,n in self.g.triples((None, letter['correspondent'], None)): loc_key = urllib.unquote(n.replace("http://www.opencorrespondence.org/correspondent/resource/", "").replace("/rdf", "")) row.add(loc_key) return row def get_abstract (self, resource_id): self.g.parse('http://dbpedia.org/resource/'.resource_id) q = ''' SELECT * WHERE { ?x dbpedia:abstract ?abstract . FILTER (lang(?abstract) = 'en') } ''' for row in self.g.query(q, initNs=dict(dbpedia=Namespace("http://dbpedia.org/ontology/")), initBindings={}): return row[1] def query_dates(self, author): '''query to identify individual dates to a correspondent''' q = ''' SELECT ?date FROM <http://localhost:5000/data/endpoint/rdf> WHERE { ?r dc:subject \'''' + author + '''\' . ?r dc:date ?date. } ''' dates = [] for row in self.g.query(q, initNs=dict(letter=Namespace("http://www.opencorrespondence.org/schema#"), dc=Namespace("http://purl.org/dc/elements/1.1/")), initBindings={}): date = str(row[0]).split('-') if date[0][1:].isdigit(): dates.append(date[0]) print dates dic = {} for dt in dates: dic[dt] = dates.count(dt) return dic