def serialize(self, add, delete): commit = Namespace("urn:commit:" + str(uuid.uuid1()) + ":") eccrev = Namespace("https://vocab.eccenca.com/revision/") g = ConjunctiveGraph() namespace_manager = NamespaceManager(g) namespace_manager.bind('eccrev', eccrev, override=False) g.add((commit.term(""), RDF.type, eccrev.Commit)) graphUris = set(delete.keys()) | set(add.keys()) for graphUri in graphUris: if (graphUri in delete.keys() and len(delete[graphUri]) > 0) or (graphUri in add.keys() and len(add[graphUri]) > 0): revision = Namespace("urn:revision:" + str(uuid.uuid1()) + ":") g.add((commit.term(""), eccrev.hasRevision, revision.term(""))) g.add((revision.term(""), RDF.type, eccrev.Revision)) if str(graphUri) != 'http://quitdiff.default/': g.add((revision.term(""), eccrev.hasRevisionGraph, graphUri)) if graphUri in delete.keys() and len(delete[graphUri]) > 0: deleteGraphName = revision.term(":delete") g.add((revision.term(""), eccrev.deltaDelete, deleteGraphName)) for triple in delete[graphUri]: g.add(triple + (deleteGraphName,)) if graphUri in add.keys() and len(add[graphUri]) > 0: insertGraphName = revision.term(":insert") g.add((revision.term(""), eccrev.deltaInsert, insertGraphName)) for triple in add[graphUri]: g.add(triple + (insertGraphName,)) return g.serialize(format="trig").decode("utf-8")
def make_component_hierarchy(component_map, component_part_map): g = rdflib.Graph() indra_ns = 'http://sorger.med.harvard.edu/indra/' ln = Namespace(indra_ns + 'locations/') rn = Namespace(indra_ns + 'relations/') part_of = rn.term('partof') has_name = rn.term('hasName') for comp_id, comp_name in component_map.items(): g.add((ln.term(comp_id), has_name, Literal(comp_name))) sups = component_part_map.get(comp_id) if sups is not None: for sup_id in sups: g.add((ln.term(comp_id), part_of, ln.term(sup_id))) return g
def make_component_hierarchy(component_map, component_part_map): g = rdflib.Graph() indra_ns = 'http://sorger.med.harvard.edu/indra/' en = Namespace(indra_ns + 'entities/') rn = Namespace(indra_ns + 'relations/') part_of = rn.term('partof') has_name = rn.term('hasName') for comp_id, comp_name in component_map.items(): g.add((en.term(comp_id), has_name, Literal(comp_name))) sups = component_part_map.get(comp_id) if sups is not None: for sup_id in sups: g.add((en.term(comp_id), part_of, en.term(sup_id))) return g
def export(self, entities, entity_namespace, ontology_namespace, export_language): entity_namespace = Namespace(entity_namespace) ontology_namespace = Namespace(ontology_namespace) g = Graph() for entity in entities: entity_uuid = str(uuid.uuid4()) g.add((entity_namespace.term(entity_uuid), RDF.type, ontology_namespace.term( CIDOCCRMExporter.__entity_type_to_ontlogy_type[ entity.entity_type]))) g.add((entity_namespace.term(entity_uuid), RDFS.label, Literal(entity.name, lang='fr'))) return g.serialize(format=export_language).decode('utf-8')
def create_ontology(self, tr, predicate, subClass, address, booktitle): LDT = Namespace("http://www.JceFinalProjectOntology.com/") ut = Namespace("http://www.JceFinalProjectOntology.com/subject/#") usubClass = URIRef("http://www.JceFinalProjectOntology.com/subject/" + subClass.strip() + '#') #LDT.subClass=LDT[subClass] print(ut) print(usubClass) store = IOMemory() sty = LDT[predicate] g = rdflib.Graph(store=store, identifier=LDT) t = ConjunctiveGraph(store=store, identifier=ut) print('Triples in graph before add: ', len(t)) #g.add((LDT,RDF.type,RDFS.Class)) g.add((URIRef(LDT), RDF.type, RDFS.Class)) g.add((URIRef(LDT), RDFS.label, Literal("JFPO"))) g.add((URIRef(LDT), RDFS.comment, Literal('class of all properties'))) for v in self.symbols.values(): if self.if_compoTerm(v) == True: vs = self.splitTerms(v)[0] else: vs = v g.add((LDT[vs], RDF.type, RDF.Property)) g.add((LDT[vs], RDFS.label, Literal('has' + vs))) g.add((LDT[vs], RDFS.comment, Literal(v))) g.add((LDT[vs], RDFS.range, OWL.Class)) g.add((LDT[vs], RDFS.domain, Literal(vs))) g.bind('JFPO', LDT) #g.commit() g.serialize('trtst.rdf', format='turtle') t.add((ut[tr], RDF.type, OWL.Class)) t.add((ut[tr], RDFS.subClassOf, OWL.Thing)) t.add((ut[tr], RDFS.label, Literal(tr))) t.add((ut[tr], DC.title, Literal(booktitle))) t.add((ut[tr], DC.source, Literal(address))) t.add((ut[tr], DC[predicate], URIRef(usubClass))) t.add((ut[tr], LDT[predicate], RDF.Property)) t.add((ut[tr], DC[predicate], URIRef(usubClass))) t.add((ut[tr], DC[predicate], URIRef(usubClass))) relation = 'has' + predicate t.add((ut[tr], LDT.term(predicate), URIRef(usubClass))) t.add((usubClass, RDF.type, OWL.Class)) t.add((usubClass, RDFS.subClassOf, OWL.Thing)) t.add((usubClass, RDFS.subClassOf, URIRef(sty))) t.add((usubClass, RDFS.label, Literal(subClass))) #tc=Graph(store=store,identifier=usubClass) t.bind("dc", "http://http://purl.org/dc/elements/1.1/") t.bind('JFPO', LDT) t.commit() #print(t.serialize(format='pretty-xml')) t.serialize('test2.owl', format='turtle')
def create_ontology(self,tr,predicate,subClass,address,booktitle): LDT= Namespace("http://www.JceFinalProjectOntology.com/") ut=Namespace("http://www.JceFinalProjectOntology.com/subject/#") usubClass=URIRef("http://www.JceFinalProjectOntology.com/subject/"+subClass.strip()+'#') #LDT.subClass=LDT[subClass] print(ut) print(usubClass) store=IOMemory() sty=LDT[predicate] g = rdflib.Graph(store=store,identifier=LDT) t = ConjunctiveGraph(store=store,identifier=ut) print ('Triples in graph before add: ', len(t)) #g.add((LDT,RDF.type,RDFS.Class)) g.add((URIRef(LDT),RDF.type,RDFS.Class)) g.add((URIRef(LDT),RDFS.label,Literal("JFPO"))) g.add((URIRef(LDT),RDFS.comment,Literal('class of all properties'))) for v in self.symbols.values(): if self.if_compoTerm(v)==True: vs=self.splitTerms(v)[0] else: vs =v g.add((LDT[vs],RDF.type,RDF.Property)) g.add((LDT[vs],RDFS.label,Literal('has'+vs))) g.add((LDT[vs],RDFS.comment,Literal(v))) g.add((LDT[vs],RDFS.range,OWL.Class)) g.add((LDT[vs],RDFS.domain,Literal(vs))) g.bind('JFPO',LDT) #g.commit() g.serialize('trtst.rdf',format='turtle') t.add( (ut[tr], RDF.type,OWL.Class) ) t.add((ut[tr],RDFS.subClassOf,OWL.Thing)) t.add((ut[tr],RDFS.label,Literal(tr))) t.add((ut[tr],DC.title,Literal(booktitle))) t.add((ut[tr],DC.source,Literal(address))) t.add((ut[tr],DC[predicate],URIRef(usubClass))) t.add((ut[tr],LDT[predicate],RDF.Property)) t.add((ut[tr],DC[predicate],URIRef(usubClass))) t.add((ut[tr],DC[predicate],URIRef(usubClass))) relation='has'+predicate t.add((ut[tr],LDT.term(predicate),URIRef(usubClass))) t.add( (usubClass,RDF.type,OWL.Class)) t.add((usubClass,RDFS.subClassOf,OWL.Thing)) t.add((usubClass,RDFS.subClassOf,URIRef(sty))) t.add((usubClass,RDFS.label,Literal(subClass))) #tc=Graph(store=store,identifier=usubClass) t.bind("dc", "http://http://purl.org/dc/elements/1.1/") t.bind('JFPO',LDT) t.commit() #print(t.serialize(format='pretty-xml')) t.serialize('test2.owl',format='turtle')
def main(): indra_ns = 'http://sorger.med.harvard.edu/indra/' rn = Namespace(indra_ns + 'relations/') en = Namespace(indra_ns + 'entities/') g = Graph() isa = rn.term('isa') g.add((en.term('phosphorylation'), isa, en.term('modification'))) g.add((en.term('ubiquitination'), isa, en.term('modification'))) g.add((en.term('sumoylation'), isa, en.term('modification'))) g.add((en.term('acetylation'), isa, en.term('modification'))) g.add((en.term('hydroxylation'), isa, en.term('modification'))) save_hierarchy(g, hierarchy_path)
def serialize(self, add, delete): diff = Namespace("http://topbraid.org/diff#") g = ConjunctiveGraph() namespace_manager = NamespaceManager(g) namespace_manager.bind('diff', diff, override=False) namespace_manager.bind('owl', OWL, override=False) graphUris = set(delete.keys()) | set(add.keys()) for graphUri in graphUris: if (graphUri in delete.keys() and len(delete[graphUri]) > 0) or (graphUri in add.keys() and len(add[graphUri]) > 0): changeset = Namespace("urn:diff:" + str(uuid.uuid1())) graphTerm = changeset.term("") if str(graphUri) != 'http://quitdiff.default/': g.add((graphTerm, OWL.imports, graphUri, graphTerm)) g.add((graphTerm, RDF.type, OWL.Ontology, graphTerm)) g.add((graphTerm, OWL.imports, diff.term(""), graphTerm)) if graphUri in delete.keys() and len(delete[graphUri]) > 0: i = 0 for triple in delete[graphUri]: deleteStatementName = BNode() g.add((deleteStatementName, RDF.type, diff.DeletedTripleDiff, graphTerm)) g.add((deleteStatementName, RDF.subject, triple[0], graphTerm)) g.add((deleteStatementName, RDF.predicate, triple[1], graphTerm)) g.add((deleteStatementName, RDF.object, triple[2], graphTerm)) i += 1 if graphUri in add.keys() and len(add[graphUri]) > 0: i = 0 for triple in add[graphUri]: insertGraphName = BNode() g.add((insertGraphName, RDF.type, diff.AddedTripleDiff, graphTerm)) g.add((insertGraphName, RDF.subject, triple[0], graphTerm)) g.add((insertGraphName, RDF.predicate, triple[1], graphTerm)) g.add((insertGraphName, RDF.object, triple[2], graphTerm)) i += 1 return g.serialize(format="trig").decode("utf-8")
print("%s of %s processed" % (i, entities.count())) converter.convert_entity(e) def get_uri_by_object_id(objectId): g.bind("lobbyOntology", 'https://studi.f4.htw-berlin.de/~s0539710/lobbyradar/ontology#') result = g.query( "SELECT ?uri ?type WHERE { ?uri lobbyOntology:mongo_id '%s' ; <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?type }" % str(objectId)) for row in result: return {'uri': row['uri'], 'type': row['type']} positions_map = { u"Vorstand": lobbyOntology.term("executiveOf"), u"Mitglied": org.term("memberOf"), u"Ordentliches Mitglied": lobbyOntology.term("fullMemberOf"), u"Stellvertretendes Mitglied": lobbyOntology.term("deputyMemberOf"), u"Arbeitsverh\xe4ltnis": lobbyOntology.term("employeeOf"), u"Aufsichtsratsmitglied": lobbyOntology.term("supervisoryMemberOf"), u"Mitglied des Aufsichtsrates": lobbyOntology.term("supervisoryMemberOf"), u"Mitglied des Aufsichtsrats": lobbyOntology.term("supervisoryMemberOf"), u"Mitglied des Kuratoriums": lobbyOntology.term("kuratoriumMemberOf"), u"Mitglied des Stiftungsrates": lobbyOntology.term("kuratoriumMemberOf"), u"Mitglied des Beirates": lobbyOntology.term("advisoryMemberOf"), u"Mitglied des Vorstandes": lobbyOntology.term("executiveOf"), u"Vorstandsmitglied": lobbyOntology.term("executiveOf"), u"Staatssekret\xe4r": lobbyOntology.term("secretaryOf"), u'Parlamentarischer Staatssekret\xe4r': lobbyOntology.term("secretaryOf"), u"Mitglied im Rundfunkrat": lobbyOntology.term("mediaAdvisoryMemberOf"),
import sys import json from os.path import join, dirname, abspath from rdflib import Graph, Namespace, Literal from indra.sources import sofia # Note that this is just a placeholder, it doesn't resolve as a URL sofia_ns = Namespace('http://cs.cmu.edu/sofia/') indra_ns = 'http://sorger.med.harvard.edu/indra/' indra_rel_ns = Namespace(indra_ns + 'relations/') isa = indra_rel_ns.term('isa') def save_ontology(g, path): with open(path, 'wb') as out_file: g_bytes = g.serialize(format='nt') # Replace extra new lines in string and get rid of empty line at end g_bytes = g_bytes.replace(b'\n\n', b'\n').strip() # Split into rows and sort rows = g_bytes.split(b'\n') rows.sort() g_bytes = b'\n'.join(rows) out_file.write(g_bytes) def build_ontology(ont_json, rdf_path): G = Graph() for top_key, entries in ont_json.items(): for entry_key, examples in entries.items(): if '/' in entry_key:
import hashlib if __name__ == '__main__': arguments = docopt(__doc__, version='SKOS2XL 1.0') infile = arguments['--infile'] outfile = arguments['--outfile'] uri = arguments['--uri'] g = Graph() xl = Namespace('http://www.w3.org/2008/05/skos-xl#') base = Namespace(uri) nsm = NamespaceManager(g) # do we make the assumption that no SKOS-XL is already bound? nsm.bind('xl', xl, override=False) file_format = guess_format(infile) g.parse(infile, format=file_format) for s, o in g.subject_objects(predicate=SKOS.prefLabel): concept = Resource(g, s) hsh = hashlib.md5(o.encode('utf-8')).hexdigest() label = Resource(g, base.term('label/' + o.language + '_' + hsh)) label.add(RDF.type, xl.Label) label.add(xl.literalForm, o) concept.add(xl.prefLabel, label) g.serialize(outfile, format=file_format)
def main(search=None, cache=None, identifiers=[]): ns = Namespace("https://data.create.humanities.uva.nl/id/rkd/") ds = Dataset() ds.bind('rdfs', RDFS) ds.bind('schema', schema) ds.bind('sem', sem) ds.bind('bio', bio) ds.bind('foaf', foaf) ds.bind('void', void) ds.bind('skos', SKOS) ds.bind('owl', OWL) ds.bind('dc', dc) ds.bind('rkdArtist', URIRef("https://data.rkd.nl/artists/")) ds.bind('rkdThes', nsThesaurus) ds.bind('rkdPerson', nsPerson) ds.bind('rkdImage', URIRef("https://rkd.nl/explore/images/")) ds.bind('rkdThumb', URIRef("https://images.rkd.nl/rkd/thumb/650x650/")) ds.bind('aat', URIRef("http://vocab.getty.edu/aat/")) ## First the images g = rdfSubject.db = ds.graph(identifier=ns) # Load cache thesaurus if os.path.isfile('rkdthesaurus.json'): with open('rkdthesaurus.json') as infile: thesaurusDict = json.load(infile) else: thesaurusDict = dict() # Load cache images if os.path.isfile('imagecache.json'): with open('imagecache.json') as infile: imageCache = json.load(infile) else: imageCache = dict() # to fetch all identifiers from the search if search: thesaurusDict, imageCache = parseURL(search, thesaurusDict=thesaurusDict, imageCache=imageCache) elif cache: # assume that everything in the thesaurus is also cached for doc in cache.values(): parseData(doc, thesaurusDict=thesaurusDict) elif identifiers: for i in identifiers: thesaurusDict, imageCache = parseURL(APIURL + str(i), thesaurusDict=thesaurusDict, imageCache=imageCache) # Any images without labels? # These were not included in the search, but fetch them anyway. print("Finding referred images that were not included") q = """ PREFIX schema: <http://schema.org/> SELECT ?uri WHERE { ?role a schema:Role ; schema:isRelatedTo ?uri . FILTER NOT EXISTS { ?uri schema:name ?name } } """ images = g.query(q) print(f"Found {len(images)}!") for i in images: identifier = str(i['uri']).replace('https://rkd.nl/explore/images/', '') thesaurusDict, imageCache = parseURL( "https://api.rkd.nl/api/record/images/" + str(identifier), thesaurusDict=thesaurusDict, imageCache=imageCache) ## Then the thesaurus print("Converting the thesaurus") rdfSubject.db = ds.graph(identifier=ns.term('thesaurus/')) ids = list(thesaurusDict.keys()) for i in ids: _, thesaurusDict = getThesaurus(i, thesaurusDict, 'concept') # Save updated cache with open('rkdthesaurus.json', 'w') as outfile: json.dump(thesaurusDict, outfile) with open('imagecache.json', 'w') as outfile: json.dump(imageCache, outfile) ## Serialize print("Serializing!") ds.serialize('rkdportraits14751825.trig', format='trig')
def main(): indra_ns = 'http://sorger.med.harvard.edu/indra/' rn = Namespace(indra_ns + 'relations/') act = Namespace(indra_ns + 'activities/') g = Graph() isa = rn.term('isa') g.add((act.term('transcription'), isa, act.term('activity'))) g.add((act.term('catalytic'), isa, act.term('activity'))) g.add((act.term('gtpbound'), isa, act.term('activity'))) g.add((act.term('kinase'), isa, act.term('catalytic'))) g.add((act.term('phosphatase'), isa, act.term('catalytic'))) g.add((act.term('gef'), isa, act.term('catalytic'))) g.add((act.term('gap'), isa, act.term('catalytic'))) save_hierarchy(g, hierarchy_path)
saaInventory = Namespace( "https://data.goldenagents.org/datasets/montiasgpi/Inventory/") saaItem = Namespace( "https://data.goldenagents.org/datasets/montiasgpi/Inventory/Item/") tgn = Namespace("http://vocab.getty.edu/tgn/") ARCHIVE_DESCRIPTIONS = 'data/GPI/getty_dutch_archival_descriptions_utf8.csv' ARCHIVE_ITEMS = 'data/GPI/getty_dutch_archival_contents_utf8.csv' ############################################################ # Mapping to the Getty Thesaurus of Geographic Names (TGN) # ############################################################ COUNTRIES = { 'Netherlands': tgn.term('7016845'), 'Belgium': tgn.term('1000063'), 'Germany': tgn.term('7000084') } CITIES = { 'Alkmaar': tgn.term('7007057'), 'Amsterdam': tgn.term('7006952'), 'Antwerp': tgn.term('7007856'), 'Dordrecht': tgn.term('7006798'), 'Haarlem': tgn.term('7007048'), 'Hamburg': tgn.term('7005289'), 'Hoorn': tgn.term('7007056'), 'Leiden': tgn.term('7006809'), 'Hague, The': tgn.term('7006810'), 'Utrecht': tgn.term('7006926'),
) div_items_list = pq(d).find('.feed-list').find('li').find('div').filter( '.item') for item in div_items_list: dates = pq(item).find('div').filter('.left-col.calendar').find('span') month = pq(dates).filter('.month').text() year = pq(dates).filter('.year').text() day = pq(dates).filter('.day').text() time = pq( pq(item).find('div').filter('.middle-col.desc.hidden-xs').find( 'span').filter('.meta')[1]).text().replace(' Time: ', '') datestr = '{}-{}-{} {}'.format(year, month, day, time) description = pq(item).find('div').find('span').filter('.desc').text() link = pq(item).find('div').filter('.middle-col.desc.hidden-xs').find( 'a').attr('href') title = pq(item).find('div').filter('.middle-col.desc.hidden-xs').find( 'a').filter('.title').text() type = pq( pq(item).find('div').filter('.middle-col.desc.hidden-xs').find( 'span').filter('.meta').outerHtml()).text() event = URIRef(link) dt = datetime.strptime(datestr.lower(), '%Y-%b-%d %H:%M') g.add((event, FOAF.title, Literal(title))) g.add((event, n.term('description'), Literal(description))) g.add((event, n.term('date'), Literal(dt, datatype=XSD.date))) g.add((event, n.term('type'), Literal(type))) print(g.serialize(format='application/rdf+xml'))
entity = entities.find_one({'name': 'Rudolf Henke'}) all_entities = entities.find() for i,e in enumerate(all_entities): if (i % (entities.count() / 20) == 0): print("%s of %s processed" % (i, entities.count())) converter.convert_entity(e) def get_uri_by_object_id(objectId): g.bind("lobbyOntology", 'https://studi.f4.htw-berlin.de/~s0539710/lobbyradar/ontology#') result = g.query("SELECT ?uri ?type WHERE { ?uri lobbyOntology:mongo_id '%s' ; <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?type }" % str(objectId)) for row in result: return { 'uri': row['uri'], 'type': row['type'] } positions_map = { u"Vorstand": lobbyOntology.term("executiveOf"), u"Mitglied": org.term("memberOf"), u"Ordentliches Mitglied": lobbyOntology.term("fullMemberOf"), u"Stellvertretendes Mitglied": lobbyOntology.term("deputyMemberOf"), u"Arbeitsverh\xe4ltnis": lobbyOntology.term("employeeOf"), u"Aufsichtsratsmitglied": lobbyOntology.term("supervisoryMemberOf"), u"Mitglied des Aufsichtsrates": lobbyOntology.term("supervisoryMemberOf"), u"Mitglied des Aufsichtsrats": lobbyOntology.term("supervisoryMemberOf"), u"Mitglied des Kuratoriums": lobbyOntology.term("kuratoriumMemberOf"), u"Mitglied des Stiftungsrates": lobbyOntology.term("kuratoriumMemberOf"), u"Mitglied des Beirates": lobbyOntology.term("advisoryMemberOf"), u"Mitglied des Vorstandes": lobbyOntology.term("executiveOf"), u"Vorstandsmitglied": lobbyOntology.term("executiveOf"), u"Staatssekret\xe4r": lobbyOntology.term("secretaryOf"), u'Parlamentarischer Staatssekret\xe4r': lobbyOntology.term("secretaryOf"), u"Mitglied im Rundfunkrat": lobbyOntology.term("mediaAdvisoryMemberOf"),
import sys from rdflib import Graph, Namespace, Literal import csv import urllib2 if __name__ == '__main__': indra_ns = 'http://sorger.med.harvard.edu/indra/' if len(sys.argv) > 1: proteins_file = sys.argv[1] else: proteins_file = '../../data/ras_pathway_proteins.csv' rn = Namespace(indra_ns + 'relations/') en = Namespace(indra_ns + 'entities/') g = Graph() has_name = rn.term('hasName') has_long_name = rn.term('hasLongName') has_synonym = rn.term('hasSynonym') isa = rn.term('isa') # Read BEL family names res = urllib2.urlopen('http://resource.belframework.org/belframework/'+\ 'latest-release/namespace/selventa-protein-families.belns') belns_text = res.read() start = belns_text.find('[Values]') lines = belns_text[start:].split('\n') bel_family_names = [] for l in lines: if l.endswith(' Family|P'): family_name = l[:-2].replace(' ', '_').replace('/', '_') bel_family_names.append(family_name)
predicate_mapping, category_mapping, property_mapping, ] for mapping in mappings: for key, value in mapping.items(): if iri.lower() == key.lower(): return value return contract(iri) OBO = Namespace('http://purl.obolibrary.org/obo/') top_level_terms = { OBO.term('CL_0000000'): 'cell', OBO.term('UBERON_0001062'): 'anatomical_entity', OBO.term('PATO_0000001'): 'quality', OBO.term('NCBITaxon_131567'): 'organism', OBO.term('CLO_0000031'): 'cell_line', OBO.term('MONDO_0000001'): 'disease', OBO.term('CHEBI_23367'): 'molecular_entity', OBO.term('CHEBI_23888'): 'drug', OBO.term('UPHENO_0001001'): 'phenotypic_feature', OBO.term('GO_0008150'): 'biological_process', OBO.term('GO_0009987'): 'cellular_process', OBO.term('GO_0005575'): 'cellular_component', OBO.term('GO_0003674'): 'molecular_function', OBO.term('SO_0000704'): 'gene', OBO.term('GENO_0000002'): 'variant_locus', OBO.term('GENO_0000536'): 'genotype',
# print("Dominios", dir(property_i)) # print("Rangos", property_i.ranges) list_all_class[domain.uri].add_property(property_object) contextos = [] list_all_class_string = [] from rdflib import URIRef, BNode, Literal, Graph, RDF, Namespace FOAF.knows g = Graph() rr = Namespace('http://www.w3.org/ns/r2rml#') g.namespace_manager.bind("rr", rr) for clase_i in list_all_class.items(): # contextos.append({"name": clase_i[1].get_formated_name(), "uri": clase_i[1].uri}) class_current = URIRef("#{}".format(clase_i[1].get_formated_name())) g.add((class_current, RDF.type, rr.term('TriplesMap'))) g.add((class_current, Literal('rr:logicalTable'), Literal(clase_i[1].get_formated_name()))) g.add((class_current, Literal('rr:subjectMap'), Literal(clase_i[1].get_formated_name()))) subMap = BNode() g.add((class_current, Literal('rr:subjectMap'), subMap)) g.add((subMap, rr.template, Literal(clase_i[1].uri.replace("#", "/") + "/{ID}"))) g.add((subMap, rr.termType, rr.IRI)) g.add((subMap, rr.term('class'), Literal("<" + clase_i[1].uri + ">"))) for property_i in clase_i[1].properties: # contextos.append({"name": property_i.get_formated_name(), "uri": property_i.uri})
class RdfSink(Sink): """ RdfSink is responsible for writing data as records to an RDF serialization. .. note:: Currently only RDF N-Triples serialization is supported. Parameters ---------- owner: Transformer Transformer to which the GraphSink belongs filename: str The filename to write to format: str The file format (``nt``) compression: str The compression type (``gz``) reify_all_edges: bool Whether or not to reify all the edges kwargs: Any Any additional arguments """ def __init__( self, owner, filename: str, format: str = "nt", compression: Optional[bool] = None, reify_all_edges: bool = False, **kwargs: Any, ): super().__init__(owner) if format not in {"nt"}: raise ValueError( f"Only RDF N-Triples ('nt') serialization supported.") self.DEFAULT = Namespace(self.prefix_manager.prefix_map[""]) # self.OBO = Namespace('http://purl.obolibrary.org/obo/') self.OBAN = Namespace(self.prefix_manager.prefix_map["OBAN"]) self.PMID = Namespace(self.prefix_manager.prefix_map["PMID"]) self.BIOLINK = Namespace(self.prefix_manager.prefix_map["biolink"]) self.toolkit = get_toolkit() self.reverse_predicate_mapping = {} self.property_types = get_biolink_property_types() self.cache = {} self.reify_all_edges = reify_all_edges self.reification_types = { RDF.Statement, self.BIOLINK.Association, self.OBAN.association, } if compression == "gz": f = gzip.open(filename, "wb") else: f = open(filename, "wb") self.FH = f self.encoding = "ascii" def set_reverse_predicate_mapping(self, m: Dict) -> None: """ Set reverse predicate mappings. Use this method to update mappings for predicates that are not in Biolink Model. Parameters ---------- m: Dict A dictionary where the keys are property names and values are their corresponding IRI. """ for k, v in m.items(): self.reverse_predicate_mapping[v] = URIRef(k) def set_property_types(self, m: Dict) -> None: """ Set export type for properties that are not in Biolink Model. Parameters ---------- m: Dict A dictionary where the keys are property names and values are their corresponding types. """ for k, v in m.items(): (element_uri, canonical_uri, predicate, property_name) = process_predicate(self.prefix_manager, k) if element_uri: key = element_uri elif predicate: key = predicate else: key = property_name self.property_types[key] = v def write_node(self, record: Dict) -> None: """ Write a node record as triples. Parameters ---------- record: Dict A node record """ for k, v in record.items(): if k in {"id", "iri"}: continue ( element_uri, canonical_uri, predicate, property_name, ) = self.process_predicate(k) if element_uri is None: # not a biolink predicate if k in self.reverse_predicate_mapping: prop_uri = self.reverse_predicate_mapping[k] # prop_uri = self.prefix_manager.contract(prop_uri) else: prop_uri = k else: prop_uri = canonical_uri if canonical_uri else element_uri prop_type = self._get_property_type(prop_uri) log.debug( f"prop {k} has prop_uri {prop_uri} and prop_type {prop_type}") prop_uri = self.uriref(prop_uri) if isinstance(v, (list, set, tuple)): for x in v: value_uri = self._prepare_object(k, prop_type, x) self._write_triple(self.uriref(record["id"]), prop_uri, value_uri) else: value_uri = self._prepare_object(k, prop_type, v) self._write_triple(self.uriref(record["id"]), prop_uri, value_uri) def _write_triple(self, s: URIRef, p: URIRef, o: Union[URIRef, Literal]) -> None: """ Serialize a triple. Parameters ---------- s: rdflib.URIRef The subject p: rdflib.URIRef The predicate o: Union[rdflib.URIRef, rdflib.Literal] The object """ self.FH.write( _nt_row((s, p, o)).encode(self.encoding, "_rdflib_nt_escape")) def write_edge(self, record: Dict) -> None: """ Write an edge record as triples. Parameters ---------- record: Dict An edge record """ ecache = [] associations = set( [self.prefix_manager.contract(x) for x in self.reification_types]) associations.update([ str(x) for x in set(self.toolkit.get_all_associations(formatted=True)) ]) if self.reify_all_edges: reified_node = self.reify(record["subject"], record["object"], record) s = reified_node["subject"] p = reified_node["predicate"] o = reified_node["object"] ecache.append((s, p, o)) n = reified_node["id"] for prop, value in reified_node.items(): if prop in {"id", "association_id", "edge_key"}: continue ( element_uri, canonical_uri, predicate, property_name, ) = self.process_predicate(prop) if element_uri: prop_uri = canonical_uri if canonical_uri else element_uri else: if prop in self.reverse_predicate_mapping: prop_uri = self.reverse_predicate_mapping[prop] # prop_uri = self.prefix_manager.contract(prop_uri) else: prop_uri = predicate prop_type = self._get_property_type(prop) log.debug( f"prop {prop} has prop_uri {prop_uri} and prop_type {prop_type}" ) prop_uri = self.uriref(prop_uri) if isinstance(value, list): for x in value: value_uri = self._prepare_object(prop, prop_type, x) self._write_triple(URIRef(n), prop_uri, value_uri) else: value_uri = self._prepare_object(prop, prop_type, value) self._write_triple(URIRef(n), prop_uri, value_uri) else: if (("type" in record and record["type"] in associations) or ("association_type" in record and record["association_type"] in associations) or ("category" in record and any(record["category"]) in associations)): reified_node = self.reify(record["subject"], record["object"], record) s = reified_node["subject"] p = reified_node["predicate"] o = reified_node["object"] ecache.append((s, p, o)) n = reified_node["id"] for prop, value in reified_node.items(): if prop in {"id", "association_id", "edge_key"}: continue ( element_uri, canonical_uri, predicate, property_name, ) = self.process_predicate(prop) if element_uri: prop_uri = canonical_uri if canonical_uri else element_uri else: if prop in self.reverse_predicate_mapping: prop_uri = self.reverse_predicate_mapping[prop] # prop_uri = self.prefix_manager.contract(prop_uri) else: prop_uri = predicate prop_type = self._get_property_type(prop) prop_uri = self.uriref(prop_uri) if isinstance(value, list): for x in value: value_uri = self._prepare_object( prop, prop_type, x) self._write_triple(URIRef(n), prop_uri, value_uri) else: value_uri = self._prepare_object( prop, prop_type, value) self._write_triple(URIRef(n), prop_uri, value_uri) else: s = self.uriref(record["subject"]) p = self.uriref(record["predicate"]) o = self.uriref(record["object"]) self._write_triple(s, p, o) for t in ecache: self._write_triple(t[0], t[1], t[2]) def uriref(self, identifier: str) -> URIRef: """ Generate a rdflib.URIRef for a given string. Parameters ---------- identifier: str Identifier as string. Returns ------- rdflib.URIRef URIRef form of the input ``identifier`` """ if identifier.startswith("urn:uuid:"): uri = identifier elif identifier in reverse_property_mapping: # identifier is a property uri = reverse_property_mapping[identifier] else: # identifier is an entity fixed_identifier = identifier if fixed_identifier.startswith(":"): # TODO: this should be handled upstream by prefixcommons-py fixed_identifier = fixed_identifier.replace(":", "", 1) if " " in identifier: fixed_identifier = fixed_identifier.replace(" ", "_") if self.prefix_manager.is_curie(fixed_identifier): uri = self.prefix_manager.expand(fixed_identifier) if fixed_identifier == uri: uri = self.DEFAULT.term(fixed_identifier) elif self.prefix_manager.is_iri(fixed_identifier): uri = fixed_identifier else: uri = self.DEFAULT.term(fixed_identifier) # if identifier == uri: # if PrefixManager.is_curie(identifier): # identifier = identifier.replace(':', '_') return URIRef(uri) def _prepare_object(self, prop: str, prop_type: str, value: Any) -> rdflib.term.Identifier: """ Prepare the object of a triple. Parameters ---------- prop: str property name prop_type: str property type value: Any property value Returns ------- rdflib.term.Identifier An instance of rdflib.term.Identifier """ if prop_type == "uriorcurie" or prop_type == "xsd:anyURI": if isinstance(value, str) and PrefixManager.is_curie(value): o = self.uriref(value) elif isinstance(value, str) and PrefixManager.is_iri(value): if _is_valid_uri(value): o = URIRef(value) else: o = Literal(value) else: o = Literal(value) elif prop_type.startswith("xsd"): o = Literal(value, datatype=self.prefix_manager.expand(prop_type)) else: o = Literal(value, datatype=self.prefix_manager.expand("xsd:string")) return o def _get_property_type(self, p: str) -> str: """ Get type for a given property name. Parameters ---------- p: str property name Returns ------- str The type for property name """ # TODO: this should be properly defined in the model default_uri_types = { "biolink:type", "biolink:category", "biolink:subject", "biolink:object", "biolink:relation", "biolink:predicate", "rdf:type", "rdf:subject", "rdf:predicate", "rdf:object", } if p in default_uri_types: t = "uriorcurie" else: if p in self.property_types: t = self.property_types[p] elif f":{p}" in self.property_types: t = self.property_types[f":{p}"] elif f"biolink:{p}" in self.property_types: t = self.property_types[f"biolink:{p}"] else: t = "xsd:string" # if value: # if isinstance(value, (list, set, tuple)): # x = value[0] # if self.graph.has_node(x): # t = 'uriorcurie' # else: # t = 'xsd:string' # else: # if self.graph.has_node(value): # t = 'uriorcurie' # else: # t = 'xsd:string' return t def process_predicate(self, p: Optional[Union[URIRef, str]]) -> Tuple: """ Process a predicate where the method checks if there is a mapping in Biolink Model. Parameters ---------- p: Optional[Union[URIRef, str]] The predicate Returns ------- Tuple A tuple that contains the Biolink CURIE (if available), the Biolink slot_uri CURIE (if available), the CURIE form of p, the reference of p """ if p in self.cache: # already processed this predicate before; pull from cache element_uri = self.cache[p]["element_uri"] canonical_uri = self.cache[p]["canonical_uri"] predicate = self.cache[p]["predicate"] property_name = self.cache[p]["property_name"] else: # haven't seen this property before; map to element if self.prefix_manager.is_iri(p): predicate = self.prefix_manager.contract(str(p)) else: predicate = None if self.prefix_manager.is_curie(p): property_name = self.prefix_manager.get_reference(p) predicate = p else: if predicate and self.prefix_manager.is_curie(predicate): property_name = self.prefix_manager.get_reference( predicate) else: property_name = p predicate = f":{p}" element = self.get_biolink_element(p) canonical_uri = None if element: if isinstance(element, SlotDefinition): # predicate corresponds to a biolink slot if element.definition_uri: element_uri = self.prefix_manager.contract( element.definition_uri) else: element_uri = ( f"biolink:{sentencecase_to_snakecase(element.name)}" ) if element.slot_uri: canonical_uri = element.slot_uri elif isinstance(element, ClassDefinition): # this will happen only when the IRI is actually # a reference to a class element_uri = self.prefix_manager.contract( element.class_uri) else: element_uri = f"biolink:{sentencecase_to_camelcase(element.name)}" if "biolink:Attribute" in get_biolink_ancestors(element.name): element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}" if not predicate: predicate = element_uri else: # no mapping to biolink model; # look at predicate mappings element_uri = None if p in self.reverse_predicate_mapping: property_name = self.reverse_predicate_mapping[p] predicate = f":{property_name}" self.cache[p] = { "element_uri": element_uri, "canonical_uri": canonical_uri, "predicate": predicate, "property_name": property_name, } return element_uri, canonical_uri, predicate, property_name def get_biolink_element(self, predicate: Any) -> Optional[Element]: """ Returns a Biolink Model element for a given predicate. Parameters ---------- predicate: Any The CURIE of a predicate Returns ------- Optional[Element] The corresponding Biolink Model element """ toolkit = get_toolkit() if self.prefix_manager.is_iri(predicate): predicate_curie = self.prefix_manager.contract(predicate) else: predicate_curie = predicate if self.prefix_manager.is_curie(predicate_curie): reference = self.prefix_manager.get_reference(predicate_curie) else: reference = predicate_curie element = toolkit.get_element(reference) if not element: try: mapping = toolkit.get_element_by_mapping(predicate) if mapping: element = toolkit.get_element(mapping) except ValueError as e: self.owner.log_error( entity=str(predicate), error_type=ErrorType.INVALID_EDGE_PREDICATE, message=str(e)) element = None return element def reify(self, u: str, v: str, data: Dict) -> Dict: """ Create a node representation of an edge. Parameters ---------- u: str Subject v: str Object k: str Edge key data: Dict Edge data Returns ------- Dict The reified node """ s = self.uriref(u) p = self.uriref(data["predicate"]) o = self.uriref(v) if "id" in data: node_id = self.uriref(data["id"]) else: # generate a UUID for the reified node node_id = self.uriref(generate_uuid()) reified_node = data.copy() if "category" in reified_node: del reified_node["category"] reified_node["id"] = node_id reified_node["type"] = "biolink:Association" reified_node["subject"] = s reified_node["predicate"] = p reified_node["object"] = o return reified_node def finalize(self) -> None: """ Perform any operations after writing the file. """ self.FH.close()