def __init__(self, path=None): self.__dict__ = self.__shared_state if (self.data == None): if (path == None): raise ValueError("djubby's configuration MUST be initialized a first time, read http://code.google.com/p/djubby/wiki/GettingStarted") else: self.path = os.path.abspath(path) logging.debug("Reading djubby's configuration from %s..." % self.path) if (not os.path.exists(self.path)): raise ValueError("Not found a proper file at '%s' with a configuration for djubby. Please, provide a right path" % self.path) data = ConjunctiveGraph() data.bind("conf", ns.config) try: data.load(path, format='n3') except Exception, e: raise ValueError("Not found a proper N3 file at '%s' with a configuration for djubby. Please, provide a valid N3 file" % self.path) self.data = data try: self.graph = self.get_value("sparqlDefaultGraph") self.endpoint = self.get_value("sparqlEndpoint") except Exception, e: raise ValueError("Not found the graph not the endpoint that it's supposed djubby have to query. Please, provide a right donfiguration") logging.info("Using <%s> as default graph to query the endpoint <%s>" % (self.graph, self.endpoint)) self.__class__.__dict__['_Configuration__shared_state']["data"] = data #FIXME
def make_rdf_graph(movies): mg=ConjunctiveGraph() mg.bind('fb',FB) mg.bind('dc',DC) for movie in movies: # Make a movie node movie_node=IVA_MOVIE[movie['id']] mg.add((movie_node,DC['title'],Literal(movie['title']))) # Make the director node, give it a name and link it to the movie dir_node=IVA_PERSON[movie['director']['id']] mg.add((movie_node,FB['film.film.directed_by'],dir_node)) mg.add((dir_node,DC['title'],Literal(movie['director']['name']))) for actor in movie['actors']: # The performance node is a blank node -- it has no URI performance=BNode() # The performance is connected to the actor and the movie actor_node=IVA_PERSON[actor['id']] mg.add((actor_node,DC['title'],Literal(actor['name']))) mg.add((performance,FB['film.performance.actor'],actor_node)) # If you had the name of the role, you could also add it to the # performance node, e.g. # mg.add((performance,FB['film.performance.role'],Literal('Carrie Bradshaw'))) mg.add((movie_node,FB['film.film.performances'],performance)) return mg
def to_rdf(self, format="settings"): """Convert the RdfSerializer store into RDF.""" graph = Graph() for k, v in self.NAMESPACES.iteritems(): graph.bind(k, v) for g in self.subgraphs: graph += g if format == "settings": format = settings.RDF_SERIALIZATION return graph.serialize(format=format)
def toRDF(self): """ Print a message into RDF in XML format """ #rdf graph store = ConjunctiveGraph() #namespaces store.bind('sioc', SIOC) store.bind('foaf', FOAF) store.bind('rdfs', RDFS) store.bind('dc', DC) store.bind('dct', DCT) #message node message = URIRef(self.getUri()) store.add((message, RDF.type, SIOC["Post"])) #document node doc = URIRef(self.getUri() + '.rdf') store.add((doc, RDF.type, FOAF["Document"])) store.add((doc, FOAF["primaryTopic"], message)) try: store.add((message, SIOC['id'], Literal(self.getSwamlId()))) store.add((message, SIOC['link'], URIRef(self.getXhtmlUrl()))) store.add((message, SIOC['has_container'], URIRef(self.config.get('base') + 'forum'))) store.add((message, SIOC["has_creator"], URIRef(self.getSender().getUri()))) store.add((message, DC['title'], Literal(self.getSubject()))) store.add((message, DCT['created'], Literal(self.getDate(), datatype=XSD[u'dateTime']))) parent = self.getParent() if (parent != None): store.add((message, SIOC['reply_of'], URIRef(parent))) if (len(self.childs) > 0): for child in self.childs: store.add((message, SIOC['has_reply'], URIRef(child))) previous = self.getPreviousByDate() if (previous != None): store.add( (message, SIOC['previous_by_date'], URIRef(previous))) next = self.getNextByDate() if (next != None): store.add((message, SIOC['next_by_date'], URIRef(next))) store.add((message, SIOC['content'], Literal(self.getBody()))) except Exception, detail: print 'Error proccesing message ' + str( self.getId()) + ': ' + str(detail)
def parse(self, source, graph): # we're currently being handed a Graph, not a ConjunctiveGraph assert graph.store.context_aware # is this implied by formula_aware assert graph.store.formula_aware conj_graph = ConjunctiveGraph(store=graph.store) conj_graph.default_context = graph # TODO: CG __init__ should have a default_context arg # TODO: update N3Processor so that it can use conj_graph as the sink sink = Sink(conj_graph) if False: sink.quantify = lambda *args: True sink.flatten = lambda *args: True baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "") p = N3Processor("nowhere", sink, baseURI=baseURI) # pass in "nowhere" so we can set data instead p.userkeys = True # bah p.data = source.getByteStream().read() # TODO getCharacterStream? p.parse() for prefix, namespace in p.bindings.items(): conj_graph.bind(prefix, namespace)
def toRDF(self): """ Print a message into RDF in XML format """ #rdf graph store = ConjunctiveGraph() #namespaces store.bind('sioc', SIOC) store.bind('foaf', FOAF) store.bind('rdfs', RDFS) store.bind('dc', DC) store.bind('dct', DCT) #message node message = URIRef(self.getUri()) store.add((message, RDF.type, SIOC["Post"])) #document node doc = URIRef(self.getUri()+'.rdf') store.add((doc, RDF.type, FOAF["Document"])) store.add((doc, FOAF["primaryTopic"], message)) try: store.add((message, SIOC['id'], Literal(self.getSwamlId()))) store.add((message, SIOC['link'], URIRef(self.getXhtmlUrl()))) store.add((message, SIOC['has_container'],URIRef(self.config.get('base')+'forum'))) store.add((message, SIOC["has_creator"], URIRef(self.getSender().getUri()))) store.add((message, DC['title'], Literal(self.getSubject()))) store.add((message, DCT['created'], Literal(self.getDate(), datatype=XSD[u'dateTime']))) parent = self.getParent() if (parent != None): store.add((message, SIOC['reply_of'], URIRef(parent))) if (len(self.childs) > 0): for child in self.childs: store.add((message, SIOC['has_reply'], URIRef(child))) previous = self.getPreviousByDate() if (previous != None): store.add((message, SIOC['previous_by_date'], URIRef(previous))) next = self.getNextByDate() if (next != None): store.add((message, SIOC['next_by_date'], URIRef(next))) store.add((message, SIOC['content'], Literal(self.getBody()))) except Exception, detail: print 'Error proccesing message ' + str(self.getId()) + ': ' + str(detail)
def message_board_to_sioc(dbfile): sg = ConjunctiveGraph() sg.bind('foaf', FOAF) sg.bind('sioc', SIOC) sg.bind('dc', DC) conn = sqlite3.connect(dbfile) cur = conn.cursor() # Get all the messages and add them to the graph cur.execute('SELECT id,title,content,user FROM messages') for id, title, content, user in cur.fetchall(): mnode = MB['messages/%d' % id] sg.add((mnode, RDF.type, SIOC['Post'])) sg.add((mnode, DC['title'], Literal(title))) sg.add((mnode, SIOC['content'], Literal(content))) sg.add((mnode, SIOC['has_creator'], MB['users/%s' % user])) # Get all the users and add them to the graph cur.execute('SELECT id,name,email FROM users') for id, name, email in cur.fetchall(): sg.add((mnode, RDF.type, SIOC['User'])) unode = MB['users/%d' % id] sg.add((unode, FOAF['name'], Literal(name))) sg.add((unode, FOAF['email'], Literal(email))) # Get subjects cur.execute('SELECT id,description FROM subjects') for id, description in cur.fetchall(): sg.add((mnode, RDF.type, DCTERMS['subject'])) sg.add((MB['subjects/%d' % id], RDFS['label'], Literal(description))) # Link subject to messages cur.execute('SELECT message_id,subject_id FROM message_subjects') for mid, sid in cur.fetchall(): sg.add( (MB['messages/%s' % mid], SIOC['topic'], MB['subjects/%s'] % sid)) conn.close() return sg
def message_board_to_sioc(dbfile): sg=ConjunctiveGraph() sg.bind('foaf',FOAF) sg.bind('sioc',SIOC) sg.bind('dc',DC) conn=sqlite3.connect(dbfile) cur=conn.cursor() # Get all the messages and add them to the graph cur.execute('SELECT id,title,content,user FROM messages') for id,title,content,user in cur.fetchall(): mnode=MB['messages/%d' % id] sg.add((mnode,RDF.type,SIOC['Post'])) sg.add((mnode,DC['title'],Literal(title))) sg.add((mnode,SIOC['content'],Literal(content))) sg.add((mnode,SIOC['has_creator'],MB['users/%s' % user])) # Get all the users and add them to the graph cur.execute('SELECT id,name,email FROM users') for id,name,email in cur.fetchall(): sg.add((mnode,RDF.type,SIOC['User'])) unode=MB['users/%d' % id] sg.add((unode,FOAF['name'],Literal(name))) sg.add((unode,FOAF['email'],Literal(email))) # Get subjects cur.execute('SELECT id,description FROM subjects') for id,description in cur.fetchall(): sg.add((mnode,RDF.type,DCTERMS['subject'])) sg.add((MB['subjects/%d' % id],RDFS['label'],Literal(description))) # Link subject to messages cur.execute('SELECT message_id,subject_id FROM message_subjects') for mid,sid in cur.fetchall(): sg.add((MB['messages/%s' % mid],SIOC['topic'],MB['subjects/%s'] % sid)) conn.close() return sg
import urllib from simplejson import loads from csv import reader JB = Namespace("http://semprog.com/schemas/jobboard#") DC = Namespace("http://purl.org/dc/elements/1.1/") FOAF = Namespace("http://xmlns.com/foaf/0.1/") COMPANY = Namespace("http://purl.org/rss/1.0/modules/company/") # Connecion to Sesame con=connection('http://freerisk.org:8280/openrdf-sesame/') con.use_repository('joblistings') con.addnamespace('company',str(COMPANY)) cg=ConjunctiveGraph() cg.bind('dc',DC) cg.bind('jobboard',JB) # Find companies with ticker symbols res=con.query('select ?id ?ticker where {?id company:symbol ?ticker .}') # Loop over the results for row in res: company=URIRef(row['id']['value']) ticker=row['ticker']['value'] url='http://ichart.finance.yahoo.com/table.csv?s=%s&a=00&b=28&c=2008&d=00&e=28&f=2009&g=m&ignore=.csv' % ticker rows=[row for row in reader(urllib.urlopen(url))] current=float(rows[1][6])
_hdlr = logging.StreamHandler() _hdlr.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s')) _logger.addHandler(_hdlr) from rdflib.Graph import ConjunctiveGraph as Graph from rdflib import plugin from rdflib.store import Store from rdflib import Namespace from rdflib import Literal from rdflib import URIRef from rdflib.sparql.bison import Parse store = Graph() # with some care this could be made less redundant store.bind("dc", "http://http://purl.org/dc/elements/1.1/") store.bind("foaf", "http://xmlns.com/foaf/0.1/") store.bind("dc", 'http://purl.org/dc/elements/1.1/') store.bind("rdf", 'http://www.w3.org/1999/02/22-rdf-syntax-ns#') store.bind("rdfs", 'http://www.w3.org/2000/01/rdf-schema#') store.bind("owl", 'http://www.w3.org/2002/07/owl#') store.bind("vs", 'http://www.w3.org/2003/06/sw-vocab-status/ns#') # Create a namespace object for the Friend of a friend namespace. foaf = Namespace("http://xmlns.com/foaf/0.1/") dc = Namespace('http://purl.org/dc/elements/1.1/') rdf = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') rdfs = Namespace('http://www.w3.org/2000/01/rdf-schema#') owl = Namespace('http://www.w3.org/2002/07/owl#') vs = Namespace('http://www.w3.org/2003/06/sw-vocab-status/ns#')
class rdf_transform: def __init__(self): self.g = Graph('IOMemory') self.g.bind('dc', dublin_core) self.g.bind('foaf', FOAF) self.g.bind('time-entry', owl_time) self.g.bind('letter', letter_ns) self.g.bind('owl', owl) self.g.bind('ex', exam) self.g.bind('geo', geo) self.g.bind('base', base_uri) def create_rdf_letter (self, letters): ''' creates an rdf representation of letter used to load into the triple store ''' for l in letters: correspondence = base_uri + "letters/resource/" + l.type + '/' + urllib.quote(l.correspondent) + '/' + str(l.id) + '/rdf' self.add_author(correspondence, "Charles Dickens") self.add_subject(correspondence, "letter") self.add_time(correspondence, str(l.letter_date)+'T00:00:00') self.add_correspondent(correspondence, l.correspondent) #self.add_place(correspondence, parse_text.find_geographical(l.letter_text)) place = '' try: place = str(l.letter_place) #unicode errors are text related except UnicodeError: pass if place is not '': self.add_place(correspondence, place) self.add_letter_text(correspondence, l.letter_text) self.add_salutation(correspondence, l.correspondent, l.salutation) #for line in l.letter_text.splitlines(): # if len(line.strip()) > 1: # self.add_open(correspondence, parse_text.parse_salutation_line(line)) #this section will parse for proper names in due course #commented out whilst code is being ported #letter_name = parse_text.parseProperNames(text) # print"names, ", letter_name #for name in letter_name: # letter_rdf += "<letter:personReferred>%s</letter:personReferred>" %(name) letter_quotes = parse_text.parse_balanced_quotes(l.letter_text) for quote in letter_quotes: if str(quote[0:1]).isupper and "!" not in quote: if quote == "ALL THE YEAR ROUND" or quote=="HOUSEHOLD WORDS" or quote== "Household Words": self.add_magazine(correspondence, parse_text.stripPunc(quote)) else: self.add_text(correspondence, parse_text.stripPunc(quote)) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_rdf_end (self): ''' function to create an endpoint in rdf/xml ''' correspondence = base_uri letter = {} letter = dbase.get_endpoint_rdf() letter_items = letter.items() letter_items.sort() works = set() works = dbase.get_books() for url, text in letter_items: try: correspondence = base_uri + "letters/resource/dickens/" + urllib.quote(str(text[1])) + '/' + str(url) + '/rdf' self.add_author(correspondence, "Charles Dickens") self.add_subject(correspondence, "letter") self.add_subject(correspondence, "Charles Dickens") self.add_subject(correspondence, parse_text.camel_case(str(text[1]))) self.add_time(correspondence, str(text[3])+'T00:00:00') self.add_correspondent(correspondence, str(text[1])) self.add_salutation(correspondence, urllib.quote(str(text[1])), str(text[4])) place = str(text[5]) #for line in str(text[2]).splitlines(): # self.add_open(correspondence, parse_text.parse_salutation_line(str(text[2]))) letter = str(text[2]) #unicode errors are text related except UnicodeError: pass if place is not None: self.add_place(correspondence, place) self.add_letter_text(correspondence, letter) #this section will parse for proper names in due course #commented out whilst code is being ported #letter_name = parse_text.parseProperNames(text) # print"names, ", letter_name letter_quotes = parse_text.parse_balanced_quotes(text[2]) for quote in letter_quotes: work = parse_text.stripPunc(quote) #TODO: Normalise the text to reduce code repetition periodicals = set(['All The Year Round', 'Household Words', 'The Daily News']) #print "quote", parse_text.stripPunc(quote) if quote in periodicals: self.add_magazine(correspondence, quote) if work in works: if work == "Copperfield": work = "David Copperfield" elif work == "Nickleby": work = "Nicholas Nickleby" elif work == "Edwin Drood": work = "The Mystery of Edwin Drood" elif work == "Dombey": work = "Dombey and Son" elif work == "Tale of Two Cities": work = "A Tale of Two Cities" elif work == "Christmas Carol": work = "A Christmas Carol" self.add_text(correspondence, work) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_correspondent(self, corr, letter_items): u_corr = unicode(corr) correspondence = base_uri + "correspondent/resource/" + urllib.quote(corr) self.add_subject(correspondence, "correspondent") #self.add_correspondent(correspondence, corr) for url, text in letter_items: if url is not None or url != '': self.add_salutation(correspondence, corr, str(url)) #need rules to define relationships - family, authors if u_corr == "Miss Hogarth": self.add_subject(correspondence, "daughter") self.add_daughter(correspondence, "Charles Dickens") self.add_sameas(correspondence, "http://dbpedia.org/page/Georgina_Hogarth") letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_publication(self, title, type): books_set = {} start = ''; end = ''; abstract = ''; uri_str = ''; source = ''; books = dbase.get_book_rdf(title) book_items = books.items() book_items.sort() for u, book in book_items: title = u start = book[0] end = book[1] abstract = book[2] uri_str = book[3] source = book[4] #create a books dictionary as a list of records to build a list of uris from # title => uri string books_set[u] = uri_str if ":" in u: for bk in u.split(":"): books_set[bk[0]] = uri_str if "The " in u or "A " in u: aka = u.replace("The ", "").replace("A ", "") books_set[aka] = uri_str correspondence = base_uri + type + "/resource/" + title.strip().replace(" ", "_") self.add_subject(correspondence, type) self.add_subject(correspondence, "Charles Dickens") self.add_author(correspondence, "Charles Dickens") self.add_time(correspondence, start) self.add_time(correspondence, end) self.add_title(correspondence, title) self.add_abstract(correspondence, abstract) uri = u"http://dbpedia.org/page/" + uri_str self.add_sameas(correspondence, uri) if type == "book": source_uri = "http://gutenberg.org/ebooks/" + source self.add_sameas(correspondence, source_uri) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_place (self, placeobj): (lat, long, place_name, source) = ('','','','') for location in placeobj: place_name= location.placeid lat = location.latitude long = location.longitude source = location.source correspondence = base_uri + "place/resource/" + urllib.quote(place_name)+ "/rdf" self.add_latitude(correspondence, lat) self.add_longitude(correspondence, long) self.add_place_name(correspondence, place_name) #self.add_description(correspondence, place_abstract) self.add_sameas(correspondence, source) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_author (self, data): ''' function to return an author graph in rdf ''' author = u"Charles Dickens" subject = u"author" born = u"1812-02-07" died = u"1870-06-09" abstract = u"Charles John Huffam Dickens, pen-name 'Boz', was the most popular English novelist of the Victorian era, and one of the most popular of all time, responsible for some of English literature's most iconic characters. Many of his novels, with their recurrent theme of social reform, first appeared in periodicals and magazines in serialised form, a popular format for fiction at the time. Unlike other authors who completed entire novels before serial production began, Dickens often wrote them while they were being serialized, creating them in the order in which they were meant to appear. The practice lent his stories a particular rhythm, punctuated by one 'cliffhanger' after another to keep the public looking forward to the next installment. The continuing popularity of his novels and short stories is such that they have never gone out of print. His work has been praised for its mastery of prose and unique personalities by writers such as George Gissing and G. K. Chesterton, though the same characteristics prompted others, such as Henry James and Virginia Woolf, to criticize him for sentimentality and implausibility." author_url = u"http://en.wikipedia.org/wiki/Charles_Dickens" correspondence = base_uri + "author/resource/" + author self.add_subject(correspondence, "Charles Dickens") self.add_subject(correspondence, "author") self.add_nick(correspondence, "Boz") self.add_time(correspondence, born) self.add_time(correspondence, died) self.add_abstract(correspondence, abstract) self.add_sameas(correspondence, author_url) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def add_author(self, correspondence, name): ''' function to add author to graph ''' dc_author = urllib.quote(name) lauthor = URIRef(base_uri+ 'author/resource/%s' % dc_author)+ "/rdf" self.g.add((correspondence, dublin_core['creator'], lauthor)) #self.g.add((correspondence, dublin_core['creator'], Literal(name))) return lauthor def add_salutation(self, correspondence, author, name): ''' function to add salutation to graph ''' nameid = urllib.quote(author) person = URIRef(base_uri + 'correspondent/resource/%s' % nameid)+ "/rdf" #self.g.add((person, RDF.type, FOAF['nick'])) self.g.add((correspondence, FOAF['nick'], Literal(name))) return person def add_correspondent(self, correspondence, name): ''' function to add correspondent to graph ''' nameid = urllib.quote(name) person = URIRef(base_uri + 'correspondent/resource/%s' % nameid)+ "/rdf" self.g.add((correspondence, letter_ns["correspondent"], person)) #self.g.add((person, Letter, Literal(name))) return person def add_magazine(self, correspondence, name): ''' function to add magazine to graph ''' nameid = urllib.quote(name) magazine = URIRef(base_uri + 'magazine/resource/%s' % nameid)+ "/rdf" self.g.add((correspondence, letter_ns['textReferred'], magazine)) #self.g.add((person, Letter, Literal(name))) return magazine def add_text (self, correspondence, textname): ''' function to add referred text to the graph''' textid = base_uri + "book/resource/"+textname.replace("\n", "_").replace(" ", "_")+ "/rdf" return self.g.add((correspondence, letter_ns['textReferred'], URIRef(textid))) def add_author_text (self, correspondence, textname): ''' function to add author referred text to the graph''' textid = urllib.quote(textname) self.g.add((correspondence, letter_ns['textAuthorReferred'], Literal(textname))) return book def add_place(self, correspondence, place): return self.g.add((correspondence, dublin_core['date'], Literal(str(time)))) def add_subject (self, correspondence, subject): return self.g.add((correspondence, dublin_core['subject'], Literal(subject))) def add_sameas (self, correspondence, link): return self.g.add((correspondence, owl['sameAs'], URIRef(link))) def add_time(self, correspondence, time): ''' function to add time ''' return self.g.add((correspondence, dublin_core['date'], Literal(str(time)))) def add_title (self, correspondence, title): return self.g.add((correspondence, dublin_core['title'], Literal(title))) def add_nick (self, correspondence, nick): return self.g.add((correspondence, FOAF['nick'], Literal(nick))) def add_place (self, correspondence, place): return self.g.add((correspondence, dublin_core['title'], URIRef(base_uri + "place/resource/"+urllib.quote(place)+ "/rdf"))) def add_daughter (self, correspondence, author): return self.g.add((correspondence, exam['daughter'], URIRef(base_uri + "author/resource/" + author + "/rdf"))) def add_letter_text (self, correspondence, letter_text): return self.g.add((correspondence, letter_ns['Text'], Literal(letter_text))) def add_longitude (self, correspondence, long): return self.g.add((correspondence, geo['long'], Literal(long))) def add_latitude (self, correspondence, lat): return self.g.add((correspondence, geo['lat'], Literal(lat))) def add_description (self, correspondence, abstract): return self.g.add((correspondence, geo['desc'], Literal(abstract))) def add_place_name (self, correspondence, name): return self.g.add((correspondence, geo['name'], Literal(name))) def add_abstract (self, correspondence, letters): return self.g.add((correspondence, letter_ns['text'], Literal(letters))) def add_open (self, correspondence, letters): return self.g.add((correspondence, letter_ns['open'], Literal(letters))) def add_close (self, correspondence, letters): return self.g.add((correspondence, letter_ns['close'], Literal(letters)))
import urllib from simplejson import loads from csv import reader JB = Namespace("http://semprog.com/schemas/jobboard#") DC = Namespace("http://purl.org/dc/elements/1.1/") FOAF = Namespace("http://xmlns.com/foaf/0.1/") COMPANY = Namespace("http://purl.org/rss/1.0/modules/company/") # Connecion to Sesame con = connection('http://freerisk.org:8280/openrdf-sesame/') con.use_repository('joblistings') con.addnamespace('company', str(COMPANY)) cg = ConjunctiveGraph() cg.bind('dc', DC) cg.bind('jobboard', JB) # Find companies with ticker symbols res = con.query('select ?id ?ticker where {?id company:symbol ?ticker .}') # Loop over the results for row in res: company = URIRef(row['id']['value']) ticker = row['ticker']['value'] url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&a=00&b=28&c=2008&d=00&e=28&f=2009&g=m&ignore=.csv' % ticker rows = [row for row in reader(urllib.urlopen(url))] current = float(rows[1][6])
def update_rdf_for_conversion(prefix, vocab_properties, rdf_vocab_properties): #(id, base, prefix) = get_vocab_base(vocabfile) html_vocab_properties = {} html_vocab_properties['format'] = 'text/html' html_vocab_properties['name'] = "%s.html"%os.path.splitext(rdf_vocab_properties['name'])[0] html_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], html_vocab_properties['name']) html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], html_vocab_properties['name']) newrdf_vocab_properties = {} newrdf_vocab_properties['format'] = 'application/rdf+xml' newrdf_vocab_properties['name'] = "%s_modified.rdf"%os.path.splitext(rdf_vocab_properties['name'])[0] newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name']) newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name']) graph = Graph() graph.parse(rdf_vocab_properties['path']) subject = None for s in graph.subjects(namespaces['rdf']['type'], URIRef(namespaces['owl']['Ontology'])): subject = s #graph2 = Graph() graph_ns = [] for nsprefix, nsurl in graph.namespaces(): graph_ns.append(str(nsurl)) for prefix, url in namespaces.iteritems(): if not str(url) in graph_ns: graph.bind(prefix, URIRef(url)) #properties = get_vocab_properties(prefix) #subject = None #for s in graph.subjects(namespaces['dc']['title'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dcterms']['title'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dc']['creator'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dcterms']['creator'], None): # subject = s formatNode1 = BNode() formatNode2 = BNode() #Add vocabulary properties identifier and format graph.add((subject, namespaces['dc']['identifier'], URIRef(rdf_vocab_properties['uri']))) graph.add((subject, namespaces['dcterms']['isVersionOf'], URIRef(vocab_properties['preferredNamespaceUri']))) graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(rdf_vocab_properties['uri']))) graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(html_vocab_properties['uri']))) graph.add((subject, namespaces['vann']['preferredNamespaceUri'], URIRef(vocab_properties['preferredNamespaceUri']))) graph.add((subject, namespaces['vann']['preferredNamespacePrefix'], URIRef(vocab_properties['preferredNamespacePrefix']))) graph.add((URIRef(html_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text']))) graph.add((URIRef(html_vocab_properties['uri']), namespaces['dc']['format'], formatNode1)) graph.add((formatNode1, namespaces['rdf']['value'], Literal('text/html'))) graph.add((formatNode1, namespaces['rdfs']['label'], Literal('HTML'))) graph.add((formatNode1, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT']))) graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text']))) graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['dc']['format'], formatNode2)) graph.add((formatNode2, namespaces['rdf']['value'], Literal('application/rdf+xml'))) graph.add((formatNode2, namespaces['rdfs']['label'], Literal('RDF'))) graph.add((formatNode2, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT']))) #Add rdfs:isDefinedBy for each class / property / term of the vocabulary #Find if schema is rdfs / owl. This defines the possible types (rdf:type) for each class / property / term #testo = vocab_type_definitions_test['rdfs'] #subjects = [] #subs = graph.subjects(namespaces['rdf']['type'], URIRef(testo)) #for s in subs: # subjects.append(s) #if subjects: # objects = vocab_type_definitions_rdfs #else: # objects = vocab_type_definitions_owl #For all subjects that are of the type found above, add rdfs:isDefinedBy #for o in objects: # subs = graph.subjects(namespaces['rdf']['type'], o) # for s in subs: # graph.add((s, namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri']))) list_of_terms = get_terms(rdf_vocab_properties['path']) for s in list_of_terms: graph.add((URIRef(s), namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri']))) rdf_str = None rdf_str = graph.serialize(format="pretty-xml") #f = codecs.open(newrdf_vocab_properties['path'], 'w', 'utf-8') f = codecs.open(newrdf_vocab_properties['path'], 'w') f.write(rdf_str) f.close() return (newrdf_vocab_properties, html_vocab_properties)
def __toRDF(self): """ Dump mailing list into a RDF file """ #rdf graph store = ConjunctiveGraph() #namespaces store.bind('rdfs', RDFS) store.bind('swaml', SWAML) store.bind('sioc', SIOC) store.bind('sioct', SIOCT) store.bind('foaf', FOAF) store.bind('dc', DC) store.bind('mvcb', MVCB) #fisrt the host graph host = self.config.get('host') if (len(host) > 0): self.__addSite(store, host) #and then the mailing list list = URIRef(self.__getUri()) store.add((list, RDF.type, SIOC['Forum'])) #store.add((list, RDF.type, SIOCT['MailingList'])) #list information title = self.config.get('title') if (len(title) > 0): store.add((list, DC['title'], Literal(title))) description = self.config.get('description') if (len(description) > 0): store.add((list, DC['description'], Literal(description))) if (len(host) > 0): store.add((list, SIOC['has_host'], URIRef(host))) store.add((list, SWAML['address'], Literal(self.config.get('to')))) store.add((list, DC['date'], Literal(FileDate(self.config.get('mbox')).getStringFormat()))) store.add((list, MVCB['generatorAgent'], URIRef(self.config.getAgent()))) store.add((list, MVCB['errorReportsTo'], URIRef('http://swaml.berlios.de/bugs'))) if (self.lang != None): store.add((list, DC['language'], Literal(self.lang))) #subscribers subscribers = self.subscribers.getSubscribersUris() for uri in subscribers: store.add((list, SIOC['has_subscriber'], URIRef(uri))) store.add((URIRef(uri), RDF.type, SIOC['UserAccount'])) #and all messages for msg in self.index.items: uri = msg.getUri() store.add((list, SIOC['container_of'], URIRef(uri))) store.add((URIRef(uri), RDF.type, SIOC['Post'])) parent = msg.getParent() if (parent != None): store.add((URIRef(uri), SIOC['reply_of'], URIRef(parent))) #and dump to disk try: rdf_file = open(self.config.get('dir')+'forum.rdf', 'w+') rdf_file.write(store.serialize(format="pretty-xml")) rdf_file.flush() rdf_file.close() except IOError, detail: print 'Error exporting mailing list to RDF: ' + str(detail)
from rdflib.Graph import ConjunctiveGraph from rdflib import Namespace, BNode, Literal, RDF, URIRef import csv import pysesame JOBS = Namespace("http://www.medev.ac.uk/interoperability/rss/1.0/modules/jobs/rss1.0jobsmodule#") DC = Namespace("http://purl.org/dc/elements/1.1/") JB = Namespace("http://semprog.com/schemas/jobboard#") COMPANY = Namespace("http://purl.org/rss/1.0/modules/company/") RDFS=Namespace('http://www.w3.org/2000/01/rdf-schema#') jg=ConjunctiveGraph() jg.bind('jobs',JOBS) jg.bind('dc',DC) jg.bind('jobboard',JB) jg.bind('company',COMPANY) jg.bind('rdfs',RDFS) # Incremental counter for vacancy IDs vid=0 for title,salary,location,company,crunchbase,ticker in csv.reader(file('joblist.csv')): # Create the vacancy vid+=1 vacancy=JB[str(vid)] jg.add((vacancy,RDF.type,JOBS['Vacancy'])) jg.add((vacancy,DC['title'],Literal(title))) location_id=location.lower().replace(' ','_').replace(',','') jg.add((vacancy,JB['location'],JB[location_id])) jg.add((JB[location_id],DC['title'],Literal(location)))
_hdlr = logging.StreamHandler() _hdlr.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s')) _logger.addHandler(_hdlr) from rdflib.Graph import ConjunctiveGraph as Graph from rdflib import plugin from rdflib.store import Store from rdflib import Namespace from rdflib import Literal from rdflib import URIRef from rdflib.sparql.bison import Parse store = Graph() # Bind a few prefix, namespace pairs. store.bind("dc", "http://http://purl.org/dc/elements/1.1/") store.bind("foaf", "http://xmlns.com/foaf/0.1/") # Create a namespace object for the Friend of a friend namespace. foaf = Namespace("http://xmlns.com/foaf/0.1/") dc = Namespace('http://purl.org/dc/elements/1.1/') rdf = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') rdfs = Namespace('http://www.w3.org/2000/01/rdf-schema#') owl = Namespace('http://www.w3.org/2002/07/owl#') vs = Namespace('http://www.w3.org/2003/06/sw-vocab-status/ns#') classranges = {} classdomains = {} def termlink(string):
from rdflib.Graph import ConjunctiveGraph from rdflib import Namespace, BNode, Literal, RDF, URIRef import csv import pysesame JB = Namespace("http://semprog.com/schemas/jobboard#") GEO = Namespace('http://www.w3.org/2003/01/geo/wgs84_pos#') lg=ConjunctiveGraph() lg.bind('geo',GEO) for city,lat,long in csv.reader(file('city_locations.csv','U')): lg.add((JB[city],GEO['lat'],Literal(float(lat)))) lg.add((JB[city],GEO['long'],Literal(float(long)))) data=lg.serialize(format='xml') print data c=pysesame.connection('http://semprog.com:8280/openrdf-sesame/') c.use_repository('joblistings') print c.postdata(data)
from rdflib import Namespace, BNode, Literal, RDF, URIRef from pysesame import connection import urllib from simplejson import loads # Connecion to Sesame con=connection('http://freerisk.org:8280/openrdf-sesame/') con.use_repository('joblistings') con.addnamespace('rdfs','http://www.w3.org/2000/01/rdf-schema#') JB = Namespace("http://semprog.com/schemas/jobboard#") DC = Namespace("http://purl.org/dc/elements/1.1/") FOAF = Namespace("http://xmlns.com/foaf/0.1/") cg=ConjunctiveGraph() cg.bind('dc',DC) cg.bind('jobboard',JB) cg.bind('foaf',FOAF) # Find seeAlso URLs containing Crunchbase res=con.query('select ?id ?url where {?id rdfs:seeAlso ?url . FILTER regex(?url, "crunchbase")}') # Loop over the results for row in res: company=URIRef(row['id']['value']) url=row['url']['value'] data=urllib.urlopen(url).read() record=loads(data) # Add company locations for loc in record['offices']:
from construct.proxy import proxy from construct.constructservice import ServiceError from rdflib import RDF, Namespace, Literal from rdflib.Graph import ConjunctiveGraph FOAF = Namespace("http://xmlns.com/foaf/0.1/") exampleNS = Namespace("http://www.example.com/") #Create a new Proxy object. proxy = proxy() print "Executing Script" try: # Generate a piece of FOAF RDF store = ConjunctiveGraph() store.bind("foaf", "http://xmlns.com/foaf/0.1/") store.add((exampleNS["~joebloggs"], RDF.type, FOAF["Person"])) store.add((exampleNS["~joebloggs"], FOAF["name"], Literal("Joe Bloggs"))) store.add((exampleNS["~joebloggs"], FOAF["nick"], Literal("joe"))) store.add((exampleNS["~joebloggs"], FOAF["givenname"], Literal("Joe"))) store.add((exampleNS["~joebloggs"], FOAF["family_name"], Literal("Bloggs"))) data = store.serialize(format="nt") #Send the FOAF RDF to the data store if proxy.insert(data): print "The following data were added correctly:" print data else: print "Problem encountered when adding the following data:" print data except ServiceError, e: print e
def __toRDF(self): """ Dump to RDF file all subscribers """ if not (os.path.exists(self.config.get('dir'))): os.mkdir(self.config.get('dir')) #rdf graph store = ConjunctiveGraph() #namespaces store.bind('sioc', SIOC) store.bind('foaf', FOAF) store.bind('rdfs', RDFS) count = 0 #a Node for each subcriber for mail, subscriber in self.subscribers.items(): count += 1 user = URIRef(subscriber.getUri()) store.add((user, RDF.type, SIOC['UserAccount'])) store.add((user, SIOC['subscriber_of'], URIRef(self.config.get('base') + 'forum'))) try: name = subscriber.getName() if (len(name) > 0): store.add((user, SIOC['name'], Literal(name) )) store.add((user, SIOC['email_sha1'], Literal(subscriber.getShaMail()))) if (self.config.get('foaf')): foafDoc, foafUri = subscriber.getFoaf() if (foafDoc != None): store.add((user, RDFS['seeAlso'], URIRef(foafDoc))) if (foafUri != None): store.add((user, SIOC['account_of'], URIRef(foafUri))) #coordinates lat, lon = subscriber.getGeo() if (lat != None and lon != None): store.bind('geo', GEO) geo = BNode() store.add((user, FOAF['based_near'], geo)) store.add((geo, RDF.type, GEO['Point'])) store.add((geo, GEO['lat'], Literal(lat))) store.add((geo, GEO['long'], Literal(lon))) #depiction pic = subscriber.getPic() if (pic != None): store.add((user, SIOC['avatar'], URIRef(pic))) #homepage homepage = subscriber.getHomepage() if (pic != None): store.add((user, FOAF['homepage'], URIRef(homepage))) except UnicodeDecodeError, detail: print 'Error proccesing subscriber ' + subscriber.getName() + ': ' + str(detail) sentMails = subscriber.getSentMails() if (len(sentMails)>0): for uri in sentMails: store.add((user, SIOC['creator_of'], URIRef(uri)))
def schemafy(html_file): """Extract RDF from RDFa-annotated [html_file]; return a L{Graph} containing the RDF.""" # create an empty graph and bind some namespaces store = Graph() store.bind("cc", "http://creativecommons.org/ns#") store.bind("dc", "http://purl.org/dc/elements/1.1/") store.bind("dcq", "http://purl.org/dc/terms/") store.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#") store.bind("xsd", "http://www.w3.org/2001/XMLSchema-datatypes#") store.bind("owl", "http://www.w3.org/2002/07/owl#") store.bind("xhtml", "http://www.w3.org/1999/xhtml/vocab#") # parse the source document parser = rdfadict.RdfaParser() parser.parse_file(file(html_file), "http://creativecommons.org/ns", sink=GraphSink(store)) # remove undesirable assertions remove_assertions(store) return store
def update_rdf_for_conversion(prefix, vocab_properties, rdf_vocab_properties): #(id, base, prefix) = get_vocab_base(vocabfile) html_vocab_properties = {} html_vocab_properties['format'] = 'text/html' html_vocab_properties['name'] = "%s.html" % os.path.splitext( rdf_vocab_properties['name'])[0] html_vocab_properties['path'] = rdf_vocab_properties['path'].replace( rdf_vocab_properties['name'], html_vocab_properties['name']) html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace( rdf_vocab_properties['name'], html_vocab_properties['name']) newrdf_vocab_properties = {} newrdf_vocab_properties['format'] = 'application/rdf+xml' newrdf_vocab_properties['name'] = "%s_modified.rdf" % os.path.splitext( rdf_vocab_properties['name'])[0] newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace( rdf_vocab_properties['name'], newrdf_vocab_properties['name']) newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace( rdf_vocab_properties['name'], newrdf_vocab_properties['name']) graph = Graph() graph.parse(rdf_vocab_properties['path']) subject = None for s in graph.subjects(namespaces['rdf']['type'], URIRef(namespaces['owl']['Ontology'])): subject = s #graph2 = Graph() graph_ns = [] for nsprefix, nsurl in graph.namespaces(): graph_ns.append(str(nsurl)) for prefix, url in namespaces.iteritems(): if not str(url) in graph_ns: graph.bind(prefix, URIRef(url)) #properties = get_vocab_properties(prefix) #subject = None #for s in graph.subjects(namespaces['dc']['title'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dcterms']['title'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dc']['creator'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dcterms']['creator'], None): # subject = s formatNode1 = BNode() formatNode2 = BNode() #Add vocabulary properties identifier and format graph.add((subject, namespaces['dc']['identifier'], URIRef(rdf_vocab_properties['uri']))) graph.add((subject, namespaces['dcterms']['isVersionOf'], URIRef(vocab_properties['preferredNamespaceUri']))) graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(rdf_vocab_properties['uri']))) graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(html_vocab_properties['uri']))) graph.add((subject, namespaces['vann']['preferredNamespaceUri'], URIRef(vocab_properties['preferredNamespaceUri']))) graph.add((subject, namespaces['vann']['preferredNamespacePrefix'], URIRef(vocab_properties['preferredNamespacePrefix']))) graph.add((URIRef(html_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text']))) graph.add((URIRef(html_vocab_properties['uri']), namespaces['dc']['format'], formatNode1)) graph.add((formatNode1, namespaces['rdf']['value'], Literal('text/html'))) graph.add((formatNode1, namespaces['rdfs']['label'], Literal('HTML'))) graph.add((formatNode1, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT']))) graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text']))) graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['dc']['format'], formatNode2)) graph.add((formatNode2, namespaces['rdf']['value'], Literal('application/rdf+xml'))) graph.add((formatNode2, namespaces['rdfs']['label'], Literal('RDF'))) graph.add((formatNode2, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT']))) #Add rdfs:isDefinedBy for each class / property / term of the vocabulary #Find if schema is rdfs / owl. This defines the possible types (rdf:type) for each class / property / term #testo = vocab_type_definitions_test['rdfs'] #subjects = [] #subs = graph.subjects(namespaces['rdf']['type'], URIRef(testo)) #for s in subs: # subjects.append(s) #if subjects: # objects = vocab_type_definitions_rdfs #else: # objects = vocab_type_definitions_owl #For all subjects that are of the type found above, add rdfs:isDefinedBy #for o in objects: # subs = graph.subjects(namespaces['rdf']['type'], o) # for s in subs: # graph.add((s, namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri']))) list_of_terms = get_terms(rdf_vocab_properties['path']) for s in list_of_terms: graph.add((URIRef(s), namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri']))) rdf_str = None rdf_str = graph.serialize(format="pretty-xml") #f = codecs.open(newrdf_vocab_properties['path'], 'w', 'utf-8') f = codecs.open(newrdf_vocab_properties['path'], 'w') f.write(rdf_str) f.close() return (newrdf_vocab_properties, html_vocab_properties)
class rdf_transform: def __init__(self): self.g = Graph('IOMemory') self.g.bind('dc', dublin_core) self.g.bind('foaf', FOAF) self.g.bind('time-entry', owl_time) self.g.bind('letter', letter_ns) self.g.bind('owl', owl) self.g.bind('ex', exam) self.g.bind('geo', geo) self.g.bind('base', base_uri) def create_rdf_letter(self, letters): ''' creates an rdf representation of letter used to load into the triple store ''' for l in letters: correspondence = base_uri + "letters/resource/" + l.type + '/' + urllib.quote( l.correspondent) + '/' + str(l.id) + '/rdf' self.add_author(correspondence, "Charles Dickens") self.add_subject(correspondence, "letter") self.add_time(correspondence, str(l.letter_date) + 'T00:00:00') self.add_correspondent(correspondence, l.correspondent) #self.add_place(correspondence, parse_text.find_geographical(l.letter_text)) place = '' try: place = str(l.letter_place) #unicode errors are text related except UnicodeError: pass if place is not '': self.add_place(correspondence, place) self.add_letter_text(correspondence, l.letter_text) self.add_salutation(correspondence, l.correspondent, l.salutation) #for line in l.letter_text.splitlines(): # if len(line.strip()) > 1: # self.add_open(correspondence, parse_text.parse_salutation_line(line)) #this section will parse for proper names in due course #commented out whilst code is being ported #letter_name = parse_text.parseProperNames(text) # print"names, ", letter_name #for name in letter_name: # letter_rdf += "<letter:personReferred>%s</letter:personReferred>" %(name) letter_quotes = parse_text.parse_balanced_quotes(l.letter_text) for quote in letter_quotes: if str(quote[0:1]).isupper and "!" not in quote: if quote == "ALL THE YEAR ROUND" or quote == "HOUSEHOLD WORDS" or quote == "Household Words": self.add_magazine(correspondence, parse_text.stripPunc(quote)) else: self.add_text(correspondence, parse_text.stripPunc(quote)) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_rdf_end(self): ''' function to create an endpoint in rdf/xml ''' correspondence = base_uri letter = {} letter = dbase.get_endpoint_rdf() letter_items = letter.items() letter_items.sort() works = set() works = dbase.get_books() for url, text in letter_items: try: correspondence = base_uri + "letters/resource/dickens/" + urllib.quote( str(text[1])) + '/' + str(url) + '/rdf' self.add_author(correspondence, "Charles Dickens") self.add_subject(correspondence, "letter") self.add_subject(correspondence, "Charles Dickens") self.add_subject(correspondence, parse_text.camel_case(str(text[1]))) self.add_time(correspondence, str(text[3]) + 'T00:00:00') self.add_correspondent(correspondence, str(text[1])) self.add_salutation(correspondence, urllib.quote(str(text[1])), str(text[4])) place = str(text[5]) #for line in str(text[2]).splitlines(): # self.add_open(correspondence, parse_text.parse_salutation_line(str(text[2]))) letter = str(text[2]) #unicode errors are text related except UnicodeError: pass if place is not None: self.add_place(correspondence, place) self.add_letter_text(correspondence, letter) #this section will parse for proper names in due course #commented out whilst code is being ported #letter_name = parse_text.parseProperNames(text) # print"names, ", letter_name letter_quotes = parse_text.parse_balanced_quotes(text[2]) for quote in letter_quotes: work = parse_text.stripPunc(quote) #TODO: Normalise the text to reduce code repetition periodicals = set([ 'All The Year Round', 'Household Words', 'The Daily News' ]) #print "quote", parse_text.stripPunc(quote) if quote in periodicals: self.add_magazine(correspondence, quote) if work in works: if work == "Copperfield": work = "David Copperfield" elif work == "Nickleby": work = "Nicholas Nickleby" elif work == "Edwin Drood": work = "The Mystery of Edwin Drood" elif work == "Dombey": work = "Dombey and Son" elif work == "Tale of Two Cities": work = "A Tale of Two Cities" elif work == "Christmas Carol": work = "A Christmas Carol" self.add_text(correspondence, work) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_correspondent(self, corr, letter_items): u_corr = unicode(corr) correspondence = base_uri + "correspondent/resource/" + urllib.quote( corr) self.add_subject(correspondence, "correspondent") #self.add_correspondent(correspondence, corr) for url, text in letter_items: if url is not None or url != '': self.add_salutation(correspondence, corr, str(url)) #need rules to define relationships - family, authors if u_corr == "Miss Hogarth": self.add_subject(correspondence, "daughter") self.add_daughter(correspondence, "Charles Dickens") self.add_sameas(correspondence, "http://dbpedia.org/page/Georgina_Hogarth") letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_publication(self, title, type): books_set = {} start = '' end = '' abstract = '' uri_str = '' source = '' books = dbase.get_book_rdf(title) book_items = books.items() book_items.sort() for u, book in book_items: title = u start = book[0] end = book[1] abstract = book[2] uri_str = book[3] source = book[4] #create a books dictionary as a list of records to build a list of uris from # title => uri string books_set[u] = uri_str if ":" in u: for bk in u.split(":"): books_set[bk[0]] = uri_str if "The " in u or "A " in u: aka = u.replace("The ", "").replace("A ", "") books_set[aka] = uri_str correspondence = base_uri + type + "/resource/" + title.strip( ).replace(" ", "_") self.add_subject(correspondence, type) self.add_subject(correspondence, "Charles Dickens") self.add_author(correspondence, "Charles Dickens") self.add_time(correspondence, start) self.add_time(correspondence, end) self.add_title(correspondence, title) self.add_abstract(correspondence, abstract) uri = u"http://dbpedia.org/page/" + uri_str self.add_sameas(correspondence, uri) if type == "book": source_uri = "http://gutenberg.org/ebooks/" + source self.add_sameas(correspondence, source_uri) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_place(self, placeobj): (lat, long, place_name, source) = ('', '', '', '') for location in placeobj: place_name = location.placeid lat = location.latitude long = location.longitude source = location.source correspondence = base_uri + "place/resource/" + urllib.quote( place_name) + "/rdf" self.add_latitude(correspondence, lat) self.add_longitude(correspondence, long) self.add_place_name(correspondence, place_name) #self.add_description(correspondence, place_abstract) self.add_sameas(correspondence, source) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_author(self, data): ''' function to return an author graph in rdf ''' author = u"Charles Dickens" subject = u"author" born = u"1812-02-07" died = u"1870-06-09" abstract = u"Charles John Huffam Dickens, pen-name 'Boz', was the most popular English novelist of the Victorian era, and one of the most popular of all time, responsible for some of English literature's most iconic characters. Many of his novels, with their recurrent theme of social reform, first appeared in periodicals and magazines in serialised form, a popular format for fiction at the time. Unlike other authors who completed entire novels before serial production began, Dickens often wrote them while they were being serialized, creating them in the order in which they were meant to appear. The practice lent his stories a particular rhythm, punctuated by one 'cliffhanger' after another to keep the public looking forward to the next installment. The continuing popularity of his novels and short stories is such that they have never gone out of print. His work has been praised for its mastery of prose and unique personalities by writers such as George Gissing and G. K. Chesterton, though the same characteristics prompted others, such as Henry James and Virginia Woolf, to criticize him for sentimentality and implausibility." author_url = u"http://en.wikipedia.org/wiki/Charles_Dickens" correspondence = base_uri + "author/resource/" + author self.add_subject(correspondence, "Charles Dickens") self.add_subject(correspondence, "author") self.add_nick(correspondence, "Boz") self.add_time(correspondence, born) self.add_time(correspondence, died) self.add_abstract(correspondence, abstract) self.add_sameas(correspondence, author_url) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def add_author(self, correspondence, name): ''' function to add author to graph ''' dc_author = urllib.quote(name) lauthor = URIRef(base_uri + 'author/resource/%s' % dc_author) + "/rdf" self.g.add((correspondence, dublin_core['creator'], lauthor)) #self.g.add((correspondence, dublin_core['creator'], Literal(name))) return lauthor def add_salutation(self, correspondence, author, name): ''' function to add salutation to graph ''' nameid = urllib.quote(author) person = URIRef(base_uri + 'correspondent/resource/%s' % nameid) + "/rdf" #self.g.add((person, RDF.type, FOAF['nick'])) self.g.add((correspondence, FOAF['nick'], Literal(name))) return person def add_correspondent(self, correspondence, name): ''' function to add correspondent to graph ''' nameid = urllib.quote(name) person = URIRef(base_uri + 'correspondent/resource/%s' % nameid) + "/rdf" self.g.add((correspondence, letter_ns["correspondent"], person)) #self.g.add((person, Letter, Literal(name))) return person def add_magazine(self, correspondence, name): ''' function to add magazine to graph ''' nameid = urllib.quote(name) magazine = URIRef(base_uri + 'magazine/resource/%s' % nameid) + "/rdf" self.g.add((correspondence, letter_ns['textReferred'], magazine)) #self.g.add((person, Letter, Literal(name))) return magazine def add_text(self, correspondence, textname): ''' function to add referred text to the graph''' textid = base_uri + "book/resource/" + textname.replace( "\n", "_").replace(" ", "_") + "/rdf" return self.g.add( (correspondence, letter_ns['textReferred'], URIRef(textid))) def add_author_text(self, correspondence, textname): ''' function to add author referred text to the graph''' textid = urllib.quote(textname) self.g.add((correspondence, letter_ns['textAuthorReferred'], Literal(textname))) return book def add_place(self, correspondence, place): return self.g.add( (correspondence, dublin_core['date'], Literal(str(time)))) def add_subject(self, correspondence, subject): return self.g.add( (correspondence, dublin_core['subject'], Literal(subject))) def add_sameas(self, correspondence, link): return self.g.add((correspondence, owl['sameAs'], URIRef(link))) def add_time(self, correspondence, time): ''' function to add time ''' return self.g.add( (correspondence, dublin_core['date'], Literal(str(time)))) def add_title(self, correspondence, title): return self.g.add( (correspondence, dublin_core['title'], Literal(title))) def add_nick(self, correspondence, nick): return self.g.add((correspondence, FOAF['nick'], Literal(nick))) def add_place(self, correspondence, place): return self.g.add((correspondence, dublin_core['title'], URIRef(base_uri + "place/resource/" + urllib.quote(place) + "/rdf"))) def add_daughter(self, correspondence, author): return self.g.add( (correspondence, exam['daughter'], URIRef(base_uri + "author/resource/" + author + "/rdf"))) def add_letter_text(self, correspondence, letter_text): return self.g.add( (correspondence, letter_ns['Text'], Literal(letter_text))) def add_longitude(self, correspondence, long): return self.g.add((correspondence, geo['long'], Literal(long))) def add_latitude(self, correspondence, lat): return self.g.add((correspondence, geo['lat'], Literal(lat))) def add_description(self, correspondence, abstract): return self.g.add((correspondence, geo['desc'], Literal(abstract))) def add_place_name(self, correspondence, name): return self.g.add((correspondence, geo['name'], Literal(name))) def add_abstract(self, correspondence, letters): return self.g.add( (correspondence, letter_ns['text'], Literal(letters))) def add_open(self, correspondence, letters): return self.g.add( (correspondence, letter_ns['open'], Literal(letters))) def add_close(self, correspondence, letters): return self.g.add( (correspondence, letter_ns['close'], Literal(letters)))
def schemafy(html_file): """Extract RDF from RDFa-annotated [html_file]; return a L{Graph} containing the RDF.""" # create an empty graph and bind some namespaces store = Graph() store.bind("cc", "http://creativecommons.org/ns#") store.bind("dc", "http://purl.org/dc/elements/1.1/") store.bind("dcq","http://purl.org/dc/terms/") store.bind("rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#") store.bind("xsd","http://www.w3.org/2001/XMLSchema-datatypes#") store.bind("owl","http://www.w3.org/2002/07/owl#") store.bind("xhtml", "http://www.w3.org/1999/xhtml/vocab#") # parse the source document parser = rdfadict.RdfaParser() parser.parse_file(file(html_file), "http://creativecommons.org/ns", sink=GraphSink(store)) # remove undesirable assertions remove_assertions(store) return store
from rdflib.Graph import ConjunctiveGraph from rdflib import Namespace, BNode, Literal, RDF, URIRef import csv import pysesame JB = Namespace("http://semprog.com/schemas/jobboard#") GEO = Namespace('http://www.w3.org/2003/01/geo/wgs84_pos#') lg = ConjunctiveGraph() lg.bind('geo', GEO) for city, lat, long in csv.reader(file('city_locations.csv', 'U')): lg.add((JB[city], GEO['lat'], Literal(float(lat)))) lg.add((JB[city], GEO['long'], Literal(float(long)))) data = lg.serialize(format='xml') print data c = pysesame.connection('http://semprog.com:8280/openrdf-sesame/') c.use_repository('joblistings') print c.postdata(data)
from rdflib import Namespace, BNode, Literal, RDF, URIRef from pysesame import connection import urllib from simplejson import loads # Connecion to Sesame con = connection('http://freerisk.org:8280/openrdf-sesame/') con.use_repository('joblistings') con.addnamespace('rdfs', 'http://www.w3.org/2000/01/rdf-schema#') JB = Namespace("http://semprog.com/schemas/jobboard#") DC = Namespace("http://purl.org/dc/elements/1.1/") FOAF = Namespace("http://xmlns.com/foaf/0.1/") cg = ConjunctiveGraph() cg.bind('dc', DC) cg.bind('jobboard', JB) cg.bind('foaf', FOAF) # Find seeAlso URLs containing Crunchbase res = con.query( 'select ?id ?url where {?id rdfs:seeAlso ?url . FILTER regex(?url, "crunchbase")}' ) # Loop over the results for row in res: company = URIRef(row['id']['value']) url = row['url']['value'] data = urllib.urlopen(url).read() record = loads(data)
from rdflib.Graph import ConjunctiveGraph from rdflib import Namespace, BNode, Literal, RDF, URIRef import csv import pysesame JOBS = Namespace( "http://www.medev.ac.uk/interoperability/rss/1.0/modules/jobs/rss1.0jobsmodule#" ) DC = Namespace("http://purl.org/dc/elements/1.1/") JB = Namespace("http://semprog.com/schemas/jobboard#") COMPANY = Namespace("http://purl.org/rss/1.0/modules/company/") RDFS = Namespace('http://www.w3.org/2000/01/rdf-schema#') jg = ConjunctiveGraph() jg.bind('jobs', JOBS) jg.bind('dc', DC) jg.bind('jobboard', JB) jg.bind('company', COMPANY) jg.bind('rdfs', RDFS) # Incremental counter for vacancy IDs vid = 0 for title, salary, location, company, crunchbase, ticker in csv.reader( file('joblist.csv')): # Create the vacancy vid += 1 vacancy = JB[str(vid)] jg.add((vacancy, RDF.type, JOBS['Vacancy'])) jg.add((vacancy, DC['title'], Literal(title)))
#this works too #store = MySQL(identifier,configuration) #use rdflib to create schema for instance, that is, create doens't "create the db, rather, it creates the schema for the db store.open(configString, create=False) #print store.identifier ################### # a ConjunctiveGraph ################### conjgraph = ConjunctiveGraph(store=store,identifier=URIRef('http://purl.org/linguistics/gold')) data = Namespace('http://purl.org/linguistics/data/') conjgraph.bind('data',data) #probably no triples should be added the conjgraph #conjgraph.add((data['lingsign123'], data['orthographicRep'], Literal('dog'))) #conjgraph.add((data['lingsign456'], data['orthographicRep'], Literal('cat'))) #conjgraph.add((data['lingsign789'], data['orthographicRep'], Literal('fish'))) #print conjgraph.default_context conjgraph.commit() """ ############## # Plain Graph1 ################ """