def normalize_prefixes(graph, curies): mg = makeGraph('nifall', makePrefixes('owl', 'skos', 'oboInOwl'), graph=graph) mg.del_namespace('') old_namespaces = list(graph.namespaces()) ng_ = makeGraph('', prefixes=makePrefixes('oboInOwl', 'skos')) [ng_.g.add(t) for t in mg.g] [ng_.add_namespace(n, p) for n, p in curies.items() if n != ''] #[mg.add_namespace(n, p) for n, p in old_namespaces if n.startswith('ns') or n.startswith('default')] #[mg.del_namespace(n) for n in list(mg.namespaces)] #graph.namespace_manager.reset() #[mg.add_namespace(n, p) for n, p in wat.items() if n != ''] return mg, ng_
def ilx_json_to_tripples( j ): # this will be much eaiser if everything can be exported as a relationship or an anotation g = makeGraph('do not write me', prefixes=makePrefixes('ILX', 'ilx', 'owl', 'skos', 'NIFRID')) def pref(inp): return makePrefixes('ilx')['ilx'] + inp id_ = pref(j['ilx']) type_ = { 'term': 'owl:Class', 'relationship': 'owl:ObjectProperty', 'annotation': 'owl:AnnotationProperty' }[j['type']] out = [] # TODO need to expand these out.append((id_, rdflib.RDF.type, type_)) out.append((id_, rdflib.RDFS.label, j['label'])) out.append((id_, 'skos:definition', j['definition'])) for syndict in j['synonyms']: out.append((id_, 'NIFRID:synonym', syndict['literal'])) for superdict in j[ 'superclasses']: # should we be returning the preferred id here not the ilx? or maybe that is a different json output? out.append((id_, rdflib.RDFS.subClassOf, pref(superdict['ilx']))) for eid in j['existing_ids']: out.append((id_, 'ilx:someOtherId', eid['iri'])) # predicate TODO [g.add_trip(*o) for o in out] return g.g.serialize(format='nifttl') # other formats can be choosen
def load(file): filepath = os.path.expanduser(file) _, ext = os.path.splitext(filepath) filetype = ext.strip('.') if filetype == 'ttl': infmt = 'turtle' else: infmt = None print(filepath) graph = rdflib.Graph() try: graph.parse(filepath, format=infmt) except rdflib.plugins.parsers.notation3.BadSyntax as e: print('PARSING FAILED', filepath) raise e og = makeGraph('', graph=graph) # FIXME this should really just be a function :/ curie, *prefs = kludge(filepath) name = os.path.splitext(os.path.basename(filepath))[0] if 'slim' in name: name = name.replace('slim', '') try: version = list(graph.subject_objects(owl.versionIRI))[0][1] except IndexError: version = list(graph.subjects(rdf.type, owl.Ontology))[0] ng = createOntology( f'{name}-dead', f'NIF {curie} deprecated', makePrefixes('replacedBy', 'NIFRID', curie, *prefs), f'{name}dead', f'Classes from {curie} with owl:deprecated true that we want rdfs:subClassOf NIFRID:birnlexRetiredClass, or classes hiding in a oboInOwl:hasAlternativeId annotation. This file was generated by pyontutils/necromancy from {version}.' ) extract(og, ng, curie)
def loadGraphFromFile(filename, prefixes=None): graph = rdflib.Graph() graph.parse(filename, format='turtle') fn = os.path.splitext(filename)[0] print(fn) mg = makeGraph(fn, prefixes=prefixes, graph=graph, writeloc='') return mg
def do_file(filename, swap, *args): print('START', filename) ng = rdflib.Graph() ng.parse(filename, format='turtle') reps = switchURIs(ng, swap, *args) wg = makeGraph('', graph=ng) wg.filename = filename wg.write() print('END', filename) return reps
def import_tree(graph): mg = makeGraph('', graph=graph) mg.add_known_namespaces('owl', 'obo', 'dc', 'dcterms', 'dctypes', 'skos', 'NIFTTL') j = mg.make_scigraph_json('owl:imports', direct=True) t, te = creatTree(*Query('NIFTTL:nif.ttl', 'owl:imports', 'OUTGOING', 30), json=j, prefixes=mg.namespaces) #print(t) return t, te
def graph_todo(graph, curie_prefixes, get_values): ug = makeGraph('big-graph', graph=graph) ug.add_known_namespaces('NIFRID') fragment_prefixes, ureps = get_values(ug) #all_uris = sorted(set(_ for t in graph for _ in t if type(_) == rdflib.URIRef)) # this snags a bunch of other URIs #all_uris = sorted(set(_ for _ in graph.subjects() if type(_) != rdflib.BNode)) #all_uris = set(spo for t in graph.subject_predicates() for spo in t if isinstance(spo, rdflib.URIRef)) all_uris = set(spo for t in graph for spo in t if isinstance(spo, rdflib.URIRef)) prefs = set( _.rsplit('#', 1)[0] + '#' if '#' in _ else (_.rsplit('_', 1)[0] + '_' if '_' in _ else _.rsplit('/', 1)[0] + '/') for _ in all_uris) nots = set(_ for _ in prefs if _ not in curie_prefixes) # TODO sos = set(prefs) - set(nots) all_uris = [u if u not in ureps else ureps[u] for u in all_uris] #to_rep = set(_.rsplit('#', 1)[-1].split('_', 1)[0] for _ in all_uris if 'ontology.neuinfo.org' in _) #to_rep = set(_.rsplit('#', 1)[-1] for _ in all_uris if 'ontology.neuinfo.org' in _) ignore = ( # deprecated and only in as annotations 'NIFGA:birnAnatomy_011', 'NIFGA:birnAnatomy_249', 'NIFORG:birnOrganismTaxon_19', 'NIFORG:birnOrganismTaxon_20', 'NIFORG:birnOrganismTaxon_21', 'NIFORG:birnOrganismTaxon_390', 'NIFORG:birnOrganismTaxon_391', 'NIFORG:birnOrganismTaxon_56', 'NIFORG:birnOrganismTaxon_68', 'NIFINV:birnlexInvestigation_174', 'NIFINV:birnlexInvestigation_199', 'NIFINV:birnlexInvestigation_202', 'NIFINV:birnlexInvestigation_204', ) ignore = tuple(ug.expand(i) for i in ignore) non_normal_identifiers = sorted( u for u in all_uris if 'ontology.neuinfo.org' in u and noneMembers(u, *fragment_prefixes) and not u.endswith('.ttl') and not u.endswith('.owl') and u not in ignore) print(len(prefs)) embed()
def switchURIs(g, swap, *args): _, fragment_prefixes = args reps = [] prefs = {None} addpg = makeGraph('', graph=g) for t in g: nt, ireps, iprefs = tuple(zip(*swap(t, *args))) if t != nt: g.remove(t) g.add(nt) for rep in ireps: if rep is not None: reps.append(rep) for pref in iprefs: if pref not in prefs: prefs.add(pref) addpg.add_known_namespaces(fragment_prefixes[pref]) return reps
def convert(f): if f in exclude: print('skipping', f) return f ps = { 'PROTEGE': 'http://protege.stanford.edu/plugins/owl/protege#', } PREFIXES.update(ps) pi = {v: k for k, v in PREFIXES.items()} pi.pop(None) graph = rdflib.Graph() graph.parse(f, format='turtle') namespaces = [str(n) for p, n in graph.namespaces()] prefs = [] if f == 'NIF-Dysfunction.ttl': prefs.append('OBO') elif f == 'NIF-Eagle-I-Bridge.ttl': prefs.append('IAO') elif f == 'resources.ttl': prefs.append('IAO') elif f == 'NIF-Investigation.ttl': prefs.append('IAO') asdf = {v: k for k, v in ps.items()} asdf.update(pi) # determine which prefixes we need for rn, rp in asdf.items(): for uri in list(graph.subjects()) + list(graph.predicates()) + list( graph.objects()): if type(uri) == rdflib.BNode: continue elif uri.startswith(rn): if rp == 'OBO' or rp == 'IAO' or rp == 'NIFTTL': if rp == 'IAO' and 'IAO_0000412' in uri: # for sequence_slim pass else: continue prefs.append(rp) break if prefs: ps = makePrefixes(*prefs) else: ps = makePrefixes('rdfs') if 'parcellation/' in f: nsl = {p: n for p, n in graph.namespaces()} if '' in nsl: ps[''] = nsl[''] elif 'hbaslim' in f: ps['HBA'] = nsl['HBA'] elif 'mbaslim' in f: ps['MBA'] = nsl['MBA'] elif 'cocomac' in f: ps['cocomac'] = nsl['cocomac'] ng = makeGraph(os.path.splitext(f)[0], prefixes=ps, writeloc=os.path.expanduser('~/git/NIF-Ontology/ttl/')) [ng.add_node(*n) for n in graph.triples([None] * 3)] #print(f, len(ng.g)) ng.write() return f
def convert(f, prefixes): if f in exclude: print('skipping', f) return f pi = {v: k for k, v in prefixes.items()} graph = rdflib.Graph() graph.parse(f, format='turtle') namespaces = [str(n) for p, n in graph.namespaces()] prefs = [''] asdf = {} #{v:k for k, v in ps.items()} asdf.update(pi) # determine which prefixes we need for uri in list(graph.subjects()) + list(graph.predicates()) + list( graph.objects()): if uri.endswith('.owl') or uri.endswith('.ttl'): continue # don't prefix imports for rn, rp in sorted( asdf.items(), key=lambda a: -len(a[0])): # make sure we get longest first lrn = len(rn) if type(uri) == rdflib.BNode: continue elif uri.startswith( rn) and '#' not in uri[lrn:] and '/' not in uri[ lrn:]: # prevent prefixing when there is another sep prefs.append(rp) break if prefs: ps = makePrefixes(*prefs) else: ps = makePrefixes('rdfs') if 'parcellation/' in f: nsl = {p: n for p, n in graph.namespaces()} if '' in nsl: ps[''] = nsl[''] elif 'hbaslim' in f: ps['HBA'] = nsl['HBA'] elif 'mbaslim' in f: ps['MBA'] = nsl['MBA'] elif 'cocomac' in f: ps['cocomac'] = nsl['cocomac'] # special cases for NIFORG, NIFINV, NIFRET where there identifiers in # annotation properties that share the prefix, so we warn incase # at some point in the future for some reason want them again... if f == 'NIF-Organism.ttl': print('WARNING: special case for NIFORG') ps.pop('NIFORG') elif f == 'NIF-Investigation.ttl': print('WARNING: special case for NIFINV') ps.pop('NIFINV') elif f == 'unused/NIF-Retired.ttl': print('WARNING: special case for NIFRET') ps.pop('NIFGA') ng = makeGraph('', prefixes=ps) ng.filename = f [ng.g.add(t) for t in graph] #[ng.add_trip(*n) for n in graph.triples([None]*3)] #print(f, len(ng.g)) ng.write() return f
fmt = 'turtle' if ext == '.ttl' else 'xml' if noneMembers(o, 'go.owl', 'uberon.owl', 'pr.owl', 'doid.owl', 'taxslim.owl') or dobig: graph.parse(o, format=fmt) for i in range(4): repeat(True) with open( os.path.expanduser( '~/git/NIF-Ontology/scigraph/nifstd_curie_map.yaml'), 'rt') as f: wat = yaml.load(f) vals = set(wat.values()) mg = makeGraph('nifall', makePrefixes('owl', 'skos', 'oboInOwl'), graph=graph) mg.del_namespace('') old_namespaces = list(graph.namespaces()) ng_ = makeGraph('', prefixes=makePrefixes('oboInOwl', 'skos'), graph=rdflib.Graph()) [ng_.g.add(t) for t in mg.g] [ng_.add_namespace(n, p) for n, p in wat.items() if n != ''] #[mg.add_namespace(n, p) for n, p in old_namespaces if n.startswith('ns') or n.startswith('default')] #[mg.del_namespace(n) for n in list(mg.namespaces)] #graph.namespace_manager.reset() #[mg.add_namespace(n, p) for n, p in wat.items() if n != ''] def for_burak(ng):
from IPython import embed source = 'https://raw.githubusercontent.com/BlueBrain/nat/master/nat/modelingDictionary.csv' delimiter = ';' resp = requests.get(source) rows = [ r for r in csv.reader(resp.text.split('\n'), delimiter=delimiter) if r and r[0][0] != '#' ] header = [ 'Record_ID', 'parent_category', 'name', 'description', 'required_tags' ] PREFIXES = makePrefixes('owl', 'skos', 'ILX', 'ILXREPLACE', 'definition') graph = makeGraph('measures', prefixes=PREFIXES) class nat(rowParse): def Record_ID(self, value): print(value) self.old_id = value self._id = ILXREPLACE(value) def parent_category(self, value): self.super_old_id = value self.super_id = ILXREPLACE(value) def name(self, value): self.hidden = value self.label = value.replace('_', ' ')
def main(): # TODO extracly only NLX only with superclasses for fariba # FIXME there is an off by 1 error #nlxdb = get_nlxdb() scr_graph = get_scr() existing_xrefs = set([s_o[1].toPython() for s_o in scr_graph.subject_objects(rdflib.term.URIRef('http://www.geneontology.org/formats/oboInOwl#hasDbXref'))]) ONT_PATH = 'http:/github.com/tgbugs/nlxeol/' filename = 'neurolex_basic' PREFIXES = {'to':'do', 'NLXWIKI':'http://neurolex.org/wiki/', 'ILX':'http://uri.interlex.org/base/ilx_', 'ilx':'http://uri.interlex.org/base/', 'owl':'http://www.w3.org/2002/07/owl#', 'skos':'http://www.w3.org/2004/02/skos/core#', 'OBOANN':'http://ontology.neuinfo.org/NIF/Backend/OBO_annotation_properties.owl#', 'oboInOwl':'http://www.geneontology.org/formats/oboInOwl#', } ontid = ONT_PATH + filename + '.ttl' new_graph = makeGraph(filename, PREFIXES) new_graph.add_ont(ontid, 'Conversion of the neurolex database to ttl', 'Neurolex dump', 'This file is automatically generated from nlxeol/process_csv.py', TODAY) #with open('neuron_data_curated.csv', 'rt') as f: with open('neurolex_full.csv', 'rt') as f: rows = [r for r in csv.reader(f)] new_rows = [list(r) for r in zip(*[c for c in zip(*rows) if any([r for r in c if r != c[0]])])] no_data_cols = set(rows[0]) - set(new_rows[0]) print(no_data_cols) #header[header.index('Phenotypes:_ilx:has_location_phenotype')] = 'Phenotypes' # convert the header names so that ' ' is replaced with '_' state = basicConvert(new_graph, new_rows, existing_xrefs) #state = convertCurated(new_graph, new_rows) #embed() #return #_ = [print(i) for i in sorted([datetime.strptime(t, '%d %B %Y') for _ in state._set_ModifiedDate for t in _.split(',') if _])] _ = [print(i) for i in sorted(state.chebi_ids)] _ = [print(i) for i in sorted(state.drugbank_ids)] _ = [print(i) for i in sorted(state.t3db_ids)] _ = [print(i) for i in sorted(state.uni_ids)] _ = [print(i) for i in sorted(state.bad_ids)] #_ = [print(i) for i in sorted(state.failed_resolution)] #_ = [print(i) for i in sorted(state._set_LocationOfAxonArborization)] # deal with unis TODO needs to be embeded in state.Id or something incase of reference unis = {None:[]} lookup_new_id = {} for id_ in sorted([_.split(':')[1] for _ in state.uni_ids]): matches = [_ for _ in scr_graph.triples((None, None, rdflib.Literal(id_)))] if len(matches) > 1: print(matches) elif not matches: unis[None].append(id_) lookup_new_id[id_] = None else: match = matches[0] unis[match[0].rsplit('/',1)[1].replace('_',':')] = id_ lookup_new_id[id_] = match[0].rsplit('/',1)[1].replace('_',':') #return new_graph.write(convert=True) embed()
def main(): # load in our existing graph # note: while it would be nice to allow specification of phenotypes to be decoupled # from insertion into the graph... maybe we could enable this, but it definitely seems # to break a number of nice features... and we would need the phenotype graph anyway EXISTING_GRAPH = rdflib.Graph() sources = ('/tmp/NIF-Neuron-Phenotype.ttl', '/tmp/NIF-Neuron-Defined.ttl', '/tmp/NIF-Neuron.ttl', '/tmp/NIF-Phenotype-Core.ttl', '/tmp/NIF-Phenotypes.ttl', '/tmp/hbp-special.ttl') for file in sources: EXISTING_GRAPH.parse(file, format='turtle') EXISTING_GRAPH.namespace_manager.bind( 'ILXREPLACE', makePrefixes('ILXREPLACE')['ILXREPLACE']) #EXISTING_GRAPH.namespace_manager.bind('PR', makePrefixes('PR')['PR']) PREFIXES = makePrefixes( 'owl', 'skos', 'PR', 'UBERON', 'NCBITaxon', 'ILXREPLACE', 'ilx', 'ILX', 'NIFCELL', 'NIFMOL', ) graphBase.core_graph = EXISTING_GRAPH graphBase.out_graph = rdflib.Graph() graphBase._predicates = getPhenotypePredicates(EXISTING_GRAPH) g = makeGraph('merged', prefixes={k: str(v) for k, v in EXISTING_GRAPH.namespaces()}, graph=EXISTING_GRAPH) reg_neurons = list( g.g.subjects(rdflib.RDFS.subClassOf, g.expand(NIFCELL_NEURON))) tc_neurons = [ _ for (_, ) in g.g.query('SELECT DISTINCT ?match WHERE {?match rdfs:subClassOf+ %s}' % NIFCELL_NEURON) ] def_neurons = g.get_equiv_inter(NIFCELL_NEURON) nodef = sorted(set(tc_neurons) - set(def_neurons)) MeasuredNeuron.out_graph = rdflib.Graph() Neuron.out_graph = rdflib.Graph() mns = [MeasuredNeuron(id_=n) for n in nodef] dns = [Neuron(id_=n) for n in sorted(def_neurons)] #dns += [Neuron(*m.pes) if m.pes else m.id_ for m in mns] dns += [Neuron(*m.pes) for m in mns if m.pes] # reset everything for export Neuron.out_graph = rdflib.Graph() ng = makeGraph('output', prefixes=PREFIXES, graph=Neuron.out_graph) Neuron.existing_pes = { } # reset this as well because the old Class references have vanished dns = [Neuron(*d.pes) for d in set(dns) ] # TODO remove the set and use this to test existing bags? from neuron_lang import WRITEPYTHON WRITEPYTHON(sorted(dns)) ng.add_ont(ILXREPLACE('defined-neurons'), 'Defined Neurons', 'NIFDEFNEU', 'VERY EXPERIMENTAL', '0.0.0.1a') ng.add_node(ILXREPLACE('defined-neurons'), 'owl:imports', 'http://ontology.neuinfo.org/NIF/ttl/NIF-Phenotype-Core.ttl') ng.add_node(ILXREPLACE('defined-neurons'), 'owl:imports', 'http://ontology.neuinfo.org/NIF/ttl/NIF-Phenotypes.ttl') ng.write() bads = [ n for n in ng.g.subjects(rdflib.RDF.type, rdflib.OWL.Class) if len(list(ng.g.predicate_objects(n))) == 1 ] embed()
def g(filename): return makeGraph('', graph=rdflib.Graph().parse(filename, format='turtle'))
def attachPrefixes(*prefixes, graph=None): return makeGraph('', prefixes=makePrefixes(*prefixes), graph=graph)
def backend_refactor_values(): uri_reps_lit = { # from https://github.com/information-artifact-ontology/IAO/blob/master/docs/BFO%201.1%20to%202.0%20conversion/mapping.txt 'http://www.ifomis.org/bfo/1.1#Entity': 'BFO:0000001', 'BFO1SNAP:Continuant': 'BFO:0000002', 'BFO1SNAP:Disposition': 'BFO:0000016', 'BFO1SNAP:Function': 'BFO:0000034', 'BFO1SNAP:GenericallyDependentContinuant': 'BFO:0000031', 'BFO1SNAP:IndependentContinuant': 'BFO:0000004', 'BFO1SNAP:MaterialEntity': 'BFO:0000040', 'BFO1SNAP:Quality': 'BFO:0000019', 'BFO1SNAP:RealizableEntity': 'BFO:0000017', 'BFO1SNAP:Role': 'BFO:0000023', 'BFO1SNAP:Site': 'BFO:0000029', 'BFO1SNAP:SpecificallyDependentContinuant': 'BFO:0000020', 'BFO1SPAN:Occurrent': 'BFO:0000003', 'BFO1SPAN:ProcessualEntity': 'BFO:0000015', 'BFO1SPAN:Process': 'BFO:0000015', 'BFO1SNAP:ZeroDimensionalRegion': 'BFO:0000018', 'BFO1SNAP:OneDimensionalRegion': 'BFO:0000026', 'BFO1SNAP:TwoDimensionalRegion': 'BFO:0000009', 'BFO1SNAP:ThreeDimensionalRegion': 'BFO:0000028', 'http://purl.org/obo/owl/OBO_REL#bearer_of': 'RO:0000053', 'http://purl.org/obo/owl/OBO_REL#inheres_in': 'RO:0000052', 'ro:has_part': 'BFO:0000051', 'ro:part_of': 'BFO:0000050', 'ro:has_participant': 'RO:0000057', 'ro:participates_in': 'RO:0000056', 'http://purl.obolibrary.org/obo/OBI_0000294': 'RO:0000059', 'http://purl.obolibrary.org/obo/OBI_0000297': 'RO:0000058', 'http://purl.obolibrary.org/obo/OBI_0000300': 'BFO:0000054', 'http://purl.obolibrary.org/obo/OBI_0000308': 'BFO:0000055', # more bfo 'BFO1SNAP:SpatialRegion': 'BFO:0000006', 'BFO1SNAP:FiatObjectPart': 'BFO:0000024', 'BFO1SNAP:ObjectAggregate': 'BFO:0000027', 'BFO1SNAP:Object': 'BFO:0000030', #'BFO1SNAP:ObjectBoundary' # no direct replacement, only occurs in unused #'BFO1SPAN:ProcessAggregate' # was not replaced, could simply be a process itself?? #'BFO1SNAP:DependentContinuant' # was not replaced # other #'ro:participates_in' # above #'ro:has_participant' # above #'ro:has_part', # above #'ro:part_of', # above #'ro:precedes' # unused and only in inferred #'ro:preceded_by' # unused and only in inferred #'ro:transformation_of' # unused and only in inferred #'ro:transformed_into' # unused and only in inferred 'http://purl.org/obo/owl/obo#inheres_in': 'RO:0000052', 'http://purl.obolibrary.org/obo/obo#towards': 'RO:0002503', 'http://purl.org/obo/owl/pato#towards': 'RO:0002503', 'http://purl.obolibrary.org/obo/pato#inheres_in': 'RO:0000052', 'BIRNLEX:17': 'RO:0000053', # is_bearer_of 'http://purl.obolibrary.org/obo/pato#towards': 'RO:0002503', 'ro:adjacent_to': 'RO:0002220', 'ro:derives_from': 'RO:0001000', 'ro:derives_into': 'RO:0001001', 'ro:agent_in': 'RO:0002217', 'ro:has_agent': 'RO:0002218', 'ro:contained_in': 'RO:0001018', 'ro:contains': 'RO:0001019', 'ro:located_in': 'RO:0001025', 'ro:location_of': 'RO:0001015', 'ro:has_proper_part': 'NIFRID:has_proper_part', 'ro:proper_part_of': 'NIFRID:proper_part_of', # part of where things are not part of themsevles need to review } ug = makeGraph('', prefixes=makePrefixes('ro', 'RO', 'BIRNLEX', 'NIFRID', 'BFO', 'BFO1SNAP', 'BFO1SPAN')) ureps = { ug.check_thing(k): ug.check_thing(v) for k, v in uri_reps_lit.items() } return ureps