def catalog_extras(fetch=False): path = Path(auth.get_path('ontology-local-repo'), 'ttl') cat = (path / 'catalog-v001.xml').as_posix() with open((path / '../catalog-extras').as_posix(), 'rt') as ce, open(cat, 'rt') as c: clines = c.readlines() celines = ce.readlines() if clines[-2] != celines[-1]: with open(cat, 'wt') as f: f.writelines(clines[:-1] + celines + clines[-1:]) else: print(tc.blue('INFO:'), 'extras already added to catalog doing nothing') if fetch: print(tc.blue('INFO:'), 'fetching extras') def fetch_and_save(url, loc): resp = requests.get(url) saveloc = (path / loc).as_posix() if resp.ok: with open(saveloc, 'wb') as f: f.write(resp.content) print(tc.blue('INFO:'), f'{url:<60} written to {loc}') else: print(tc.red('WARNING:'), f'failed to fetch {url}') Async()(deferred(fetch_and_save)(url, loc) for line in celines for _, _, _, url, _, loc, _ in (line.split('"'), ))
def do_patch(patch_config, local_base): repo_base = Path(local_base) config_path = Path(patch_config) with open(patch_config, 'rt') as f: config = yaml.safe_load(f) for patchset, patches in config.items(): for patch, target_remote in patches.items(): patchfile = config_path.parent / patch if not patchfile.exists(): raise FileNotFoundError( f'Cannot find {patchfile} specified in {config_path}') target = target_remote['target'] targetfile = repo_base / target if 'remote' in target_remote and not targetfile.exists(): remote = target_remote['remote'] resp = requests.get(remote) with open(targetfile, 'wb') as f: f.write(resp.content) print(tc.blue('INFO: patching'), patchset, patchfile, targetfile) try: out = subprocess.check_output( ['patch', '-p1', '-N', '-i', patchfile.as_posix()], cwd=repo_base.as_posix(), stderr=subprocess.STDOUT).decode().rstrip() print(out) yield targetfile.as_posix() except subprocess.CalledProcessError as e: # FIXME this is not failing on other types of patching errors! if e.returncode > 1: # 1 means already applied print(e.stdout.decode()) raise e
def _ontology_local_repo(self): try: stated_repo = Path(self.config['ontology_local_repo']) except (KeyError, TypeError, FileNotFoundError) as e: stated_repo = Path('/dev/null/does-not-exist') maybe_repo = self._maybe_repo if stated_repo.exists(): return stated_repo elif maybe_repo.exists(): return maybe_repo else: maybe_start = Path(__file__).parent.parent.parent.absolute() maybe_base = maybe_start fsroot = Path('/') while maybe_base != fsroot: maybe_repo = maybe_base / self.ontology_repo if maybe_repo.exists(): log.info( tc.blue('INFO:') + f'Ontology repository found at {maybe_repo}') return maybe_repo else: maybe_base = maybe_base.parent else: log.warning( tc.red('WARNING:') + f'No repository found in any parent directory of {maybe_start}' ) return Path('/dev/null/does-not-exist') # seems reaonsable ...
def test_file(self, module_path=module_path, stem=stem, fname=fname): try: print(tc.ltyellow('IMPORTING:'), module_path) module = import_module( module_path) # this returns the submod self._modules[module_path] = module if hasattr(module, '_CHECKOUT_OK'): print(tc.blue('MODULE CHECKOUT:'), module, module._CHECKOUT_OK) setattr(module, '_CHECKOUT_OK', True) #print(tc.blue('MODULE'), tc.ltyellow('CHECKOUT:'), module, module._CHECKOUT_OK) #except BaseException as e: # FIXME this does not work because collected tests cannot be uncollected #suffix = fname.split('__', 1)[-1] #for mn in dir(self): #if suffix in mn: #old_func = getattr(self, mn) #new_func = pytest.mark.xfail(raises=ModuleNotFoundError)(old_func) #setattr(self, mn, new_func) #raise e finally: post_load()
def fetch_and_save(url, loc): resp = requests.get(url) saveloc = (path / loc).as_posix() if resp.ok: with open(saveloc, 'wb') as f: f.write(resp.content) print(tc.blue('INFO:'), f'{url:<60} written to {loc}') else: print(tc.red('WARNING:'), f'failed to fetch {url}')
def test_file(self, module_path=module_path, stem=stem): try: print(tc.ltyellow('IMPORTING:'), module_path) module = import_module( module_path) # this returns the submod self._modules[module_path] = module if hasattr(module, '_CHECKOUT_OK'): print(tc.blue('MODULE CHECKOUT:'), module, module._CHECKOUT_OK) setattr(module, '_CHECKOUT_OK', True) #print(tc.blue('MODULE'), tc.ltyellow('CHECKOUT:'), module, module._CHECKOUT_OK) finally: pass
def chain_to_typed_chain(chain, g, func): # duh... #pprint(chain) for s, o in zip(chain, chain[1:]): # TODO deal with reversed case s, o = s.u, o.u p = None #print(s, o) printq(s, o) for p in get_linkers(s, o, g, func): #print(tc.yellow(p)) #yield (s, edge_to_symbol(p), o) yield from (s, edge_to_symbol(p), o) if not p: for rp in get_linkers(o, s, g, func): print(tc.blue(rp)) yield from (s, edge_to_symbol(rp, rev=True), o)
def pprint_meta(meta, print_iri=True): if print_iri: if 'curie' in meta: print(meta['curie']) else: p = qname(meta['iri']) if p == meta['iri']: for iri, short in scigPrint.shorten.items(): if iri in p: p = p.replace(iri, short + ':') break print() print(tc.blue(p)) for k, v in sorted(meta.items()): if k in ('curie', 'iri'): continue for iri, short in scigPrint.shorten.items(): if iri in k: k = k.replace(iri, short + ':') break if v is not None: shift = 10 if len(k) <= 10 else (20 if len(k) <= 20 else 30) base = ' ' * 4 + f'{k:<{shift}}' if isinstance(v, list): if len(v) > 1: print(base, '[') _ = [print(' ' * 8 + scigPrint.sv(_, 8, 8)) for _ in v] print(' ' * 4 + ']') elif len(v) == 1: asdf = v[0] print(base, scigPrint.sv(asdf, len(base) + 1, len(base) - 3)) else: pass else: print(base, scigPrint.sv(v, len(base) + 1, len(base) - 3))
def printe(*args, **kwargs): print(*(tc.blue(str(a)) for a in args), **kwargs)
def would_you_like_to_know_more_question_mark(): # resolving differences between classes more_ids = set(( 'http://uri.neuinfo.org/nif/nifstd/readable/ChEBIid', 'http://uri.neuinfo.org/nif/nifstd/readable/GOid', 'http://uri.neuinfo.org/nif/nifstd/readable/MeshUid', 'http://uri.neuinfo.org/nif/nifstd/readable/PMID', 'http://uri.neuinfo.org/nif/nifstd/readable/UmlsCui', 'http://uri.neuinfo.org/nif/nifstd/readable/bamsID', 'http://uri.neuinfo.org/nif/nifstd/readable/bonfireID', 'http://uri.neuinfo.org/nif/nifstd/readable/cell_ontology_ID', 'http://uri.neuinfo.org/nif/nifstd/readable/definingCitationID', 'http://uri.neuinfo.org/nif/nifstd/readable/definingCitationURI', 'http://uri.neuinfo.org/nif/nifstd/readable/emapMouseStageDataID', 'http://uri.neuinfo.org/nif/nifstd/readable/emapMouseStageDiagramID', 'http://uri.neuinfo.org/nif/nifstd/readable/externalSourceId', 'http://uri.neuinfo.org/nif/nifstd/readable/externalSourceURI', 'http://uri.neuinfo.org/nif/nifstd/readable/gbifID', 'http://uri.neuinfo.org/nif/nifstd/readable/gbifTaxonKeyID', 'http://uri.neuinfo.org/nif/nifstd/readable/gene_Ontology_ID', #'http://uri.neuinfo.org/nif/nifstd/readable/hasExternalSource', 'http://uri.neuinfo.org/nif/nifstd/readable/hasGenbankAccessionNumber', 'http://uri.neuinfo.org/nif/nifstd/readable/imsrStandardStrainName', 'http://uri.neuinfo.org/nif/nifstd/readable/isReplacedByClass', 'http://uri.neuinfo.org/nif/nifstd/readable/jaxMiceID', 'http://uri.neuinfo.org/nif/nifstd/readable/ncbiTaxID', 'http://uri.neuinfo.org/nif/nifstd/readable/neuronamesID', 'http://uri.neuinfo.org/nif/nifstd/readable/nifID', 'http://uri.neuinfo.org/nif/nifstd/readable/sao_ID', 'http://uri.neuinfo.org/nif/nifstd/readable/umls_ID', 'http://www.geneontology.org/formats/oboInOwl#id', )) outside = [] eee = {} resolver_not_ilx_only_but_not_in_scigraph = set() # resources.ttl _res = Graph().parse((gitf / 'NIF-Ontology/ttl/resources.ttl').as_posix(), format='turtle') reslookup = {uri:[l] for uri, l in _res.subject_objects(rdfs.label)} for uri in chain(h_uris, resolver_not_ilx_only): if 'uri.neuinfo.org' in uri: try: meta = sgg.getNode(uri.toPython())['nodes'][0]['meta'] asdf = {hng.qname(k):v for k, v in meta.items() if k in more_ids} except TypeError: resolver_not_ilx_only_but_not_in_scigraph.add(uri) # resources.ttl ;) if uri in reslookup: # no differentia asdf = False else: asdf = False print('WTF', uri) if asdf: #print(uri, asdf) eee[uri] = asdf for l in asdf.values(): for e in l: outside.append(e) outside_dupes = [v for v, c in Counter(outside).most_common() if c > 1] eee_dupes = {k:v for k, v in eee.items() if anyMembers(outside_dupes, *(e for l in v.values() for e in l))} #for uri, meta in sorted(eee_dupes.items(), key=lambda a:sorted(a[1].values())): #print(uri.toPython(), sorted((e.replace('PMID: ', 'PMID:'), k) for k, l in meta.items() for e in l)) # attempt to deal with label mappings iexisting = defaultdict(set) iiexisting = {} for i, existing in zip(datal('ilx'), datal('iri')): #if 'uri.neuinfo.org' in existing: if 'interlex.org' not in existing and 'neurolex.org' not in existing: iexisting[i].add(URIRef(existing)) iiexisting[URIRef(existing)] = i iexisting = {**iexisting} _ilabs = {k:l for k, l in zip(datal('ilx'), datal('label'))} def inner(iri): resp = sgv.findById(iri) if resp is not None: l = resp['labels'] else: l = [] #_ilabs[iiexisting[iri]] + '** already in ilx **'] #print('trouble?', iri) # ilx only return iri, l #labs = {k:v[0] if v else '<--NO-LABEL-->' for k, v in Async()(deferred(inner)(id_) for id_ in chain(h_uris, (e for s in iexisting.values() for e in s)))} labs = {k:v[0] if v else '<--NO-LABEL-->' for k, v in Async()(deferred(inner)(id_) for id_ in h_uris)} ilabs = {k:l.lower() for k, l in zip(datal('ilx'), datal('label'))} iilabs = {v:k for k, v in ilabs.items()} assert len(ilabs) == len(iilabs) missing_map = {k:iilabs[v.lower()] for k, v in labs.items() if v and v.lower() in iilabs} # XXX this is not valid missing_existing = {i:[m, *iexisting[i]] for m, i in missing_map.items() if i in iexisting} missing_equivs = {next(iter(iexisting[i])):i for m, i in missing_map.items() if i in iexisting} eid = NIFRID.externalSourceId.toPython() ded = owl.deprecated.toPython() # SP: -> swissprot vs uniprot mmr = [] proto_mmr_1_to_1 = {} arrr = defaultdict(set) uniprot_iuphar = set() for uri, ilx_frag in {**missing_equivs, **missing_map}.items(): uri = URIRef(uri) try: meta = sgg.getNode(uri.toPython())['nodes'][0]['meta'] except TypeError: # just ignore these, they are ilx only :/ meta = {} if eid in meta: src = meta[eid][0] if src.startswith('SP:'): src = tc.yellow(src.replace('SP:', 'http://www.uniprot.org/uniprot/')) #elif src.startswith('IUPHAR:'): #pass #else: #src = 'TODO' elif ded in meta and meta[ded]: src = tc.red('ded ') else: src = 'TODO' val = labs[uri] if uri in labs else _ilabs[ilx_frag] + ' **' if uri in eee: differentia = str(eee[uri]) for v in eee[uri].values(): for e in v: arrr[e].add(uri) if 'SP:' in e or 'IUPHAR:' in e: uniprot_iuphar.add(uri) else: differentia = '' if uri in _ilx and uri in all_uris: ruri = SGG[hng.qname(uri)] ruri = tc.blue(f'{ruri:<60}') else: ruri = uri ruri = f'{ruri:<60}' v = ' '.join((f'{val:<60}', src, ruri, ilxb[ilx_frag], differentia)) mmr.append(v) proto_mmr_1_to_1[uri] = v src = None arrr = {**arrr} arrr_not_1_to_1 = {k:v for k, v in arrr.items() if len(v) > 1} #arrr_n11_uris = set((u.toPython() for v in arrr_not_1_to_1.values() for u in v)) arrr_n11_uris = set.union(*arrr_not_1_to_1.values()) mmr_1_to_1 = {k:v for k, v in proto_mmr_1_to_1.items() if k not in arrr_n11_uris} no_uniprot = {k:v for k, v in proto_mmr_1_to_1.items() if k not in uniprot_iuphar} arrr_n11_text = '\n'.join(f'{k:<15} {sorted(_.toPython() for _ in v)}' for k, v in arrr_not_1_to_1.items()) mmr.sort() mmr_text = '\n'.join(mmr) mmr_1_to_1_text = '\n'.join(sorted(mmr_1_to_1.values())) no_uniprot_text = '\n'.join(sorted(no_uniprot.values()))
def main(): for filename in ('mbaslim', 'hbaslim', 'paxinos-rat-labels', 'waxholm-rat-labels'): filepath = gitf / 'NIF-Ontology/ttl/generated/parcellation' / ( filename + '.ttl') dir_ = filepath.parent.as_posix() print(dir_) file_commit = subprocess.check_output( [ 'git', 'log', '-n', '1', '--pretty=format:%H', '--', filepath.name ], cwd=dir_, stderr=subprocess.DEVNULL).decode().rstrip() graph = rdflib.Graph().parse(filepath.as_posix(), format='ttl') g = makeGraph('', graph=graph) annos = defaultdict(set) anno_trips = defaultdict(set) for triple, predicate_objects in annotation.parse(graph=graph): for a_p, a_o in predicate_objects: annos[a_p, a_o].add(triple) anno_trips[triple].add((a_p, a_o)) anno_trips = {k: v for k, v in anno_trips.items()} for lifted_triple in restriction.parse(graph=graph): graph.add(lifted_triple) out_header = 'label|abbrev|curie|superPart curie\n' out = [] editions_header = 'edition|label|abbrev|curie\n' editions = [] for s in graph.subjects(rdf.type, owl.Class): rdfsLabel = next(graph.objects(s, rdfs.label)) try: prefLabel = next(graph.objects(s, skos.prefLabel)) except StopIteration: print(tc.red('WARNING:'), f'skipping {s} {rdfsLabel} since it has no prefLabel') continue syns = sorted( graph.objects(s, NIFRID.synonym) ) # TODO are there cases where we need to recaptulate what we are doing for for abbrevs? abbrevs = sorted(graph.objects( s, NIFRID.abbrev)) # FIXME paxinos has more than one try: if annos: if len(abbrevs) > 1: print(tc.blue('INFO:'), g.qname(s), repr(prefLabel.value), 'has multiple abbrevs', [a.value for a in abbrevs]) # prefer latest current_edition = '' for a in abbrevs: for a_p, edition in anno_trips[s, NIFRID.abbrev, a]: if a_p == ilxtr.literalUsedBy: if current_edition < edition: current_edition = edition abbrev = a else: abbrev = abbrevs[0] except IndexError: abbrev = '' try: superPart = next(graph.objects(s, ilxtr.labelPartOf)) except StopIteration: superPart = '' out.append( f'{prefLabel}|{abbrev}|{g.qname(s)}|{g.qname(superPart)}') if annos: #asdf = {'ed':{'label':,'abbrev':,'curie':}} asdf = defaultdict(dict) triple = s, skos.prefLabel, prefLabel eds = anno_trips[triple] for a_p, a_o in eds: asdf[a_o]['curie'] = g.qname(s) asdf[a_o]['label'] = prefLabel for syn in graph.objects(s, NIFRID.synonym): triple = s, NIFRID.synonym, syn eds = anno_trips[triple] for a_p, a_o in eds: asdf[a_o]['curie'] = g.qname(s) if 'label' in asdf[a_o]: print( tc.red('WARNING:'), f'{a_o} already has a label "{asdf[a_o]["label"]}" for "{syn}"' ) asdf[a_o]['label'] = syn for abbrev in graph.objects(s, NIFRID.abbrev): triple = s, NIFRID.abbrev, abbrev eds = anno_trips[triple] #print('aaaaaaaaaaa', g.qname(s), ) for a_p, a_o in eds: asdf[a_o]['curie'] = g.qname(s) if 'abbrev' in asdf[a_o]: print( tc.red('WARNING:'), f'{a_o} already has a abbrev "{asdf[a_o]["abbrev"]}" for "{abbrev}"' ) asdf[a_o]['abbrev'] = abbrev #print(asdf) for ed, kwargs in sorted(asdf.items()): if 'abbrev' not in kwargs: print('Skipping', ed, 'for\n', kwargs) continue editions.append('{ed}|{label}|{abbrev}|{curie}'.format( ed=g.qname(ed), **kwargs)) with open('/tmp/' + filename + f'-{file_commit[:8]}.psv', 'wt') as f: f.write(out_header + '\n'.join(sorted(out, key=labelkey))) if editions: with open('/tmp/' + filename + f'-editions-{file_commit[:8]}.psv', 'wt') as f: f.write(editions_header + '\n'.join(sorted(editions, key=edkey)))