def special_case(self): # FIXME wow is this bad # handle info_tags badset = set(OntId(t) if t.startswith('RRIDCUR:') and ' ' not in t # *shakes fist angrily* else t for t in self.badset) tagset = frozenset(badset | self.tagset - {None}) for itag in self.infotags: if itag in tagset: tagset = frozenset((t for t in tagset if t != itag)) self.warnings |= frozenset({itag}) for cv_tag, tag in self.cvtags.items(): if cv_tag in tagset: tagset = tagset - frozenset((cv_tag)) tagset |= frozenset((tag)) self.warnings |= frozenset({cv_tag}) def rrid_safe_suffix(_): hah = next(iter(self.RRIDcurator)) # FIXME multicase ... return not hah.suffix in set(t.suffix for t in self.anno_part_instances[OntId('workflow:tagCurator')]) scs = { # TODO make sure that ONLY the workflow tags are used to retrieve values # so that annotations with an RRID: tag that are/were unresolved have to # to into a special handline pipeline FIXME this implementation is NOT sufficient ('workflow:RRID',): (rrid_safe_suffix, ('workflow:RRID', 'RRIDCUR:Missing')), ('workflow:RRID', 'RRIDCUR:Validated'): (lambda x:True, ('RRIDCUR:Validated',)), # rrid deal with elsewhere ('workflow:RRID', 'RRIDCUR:Unresolved'): # super confusing ... (lambda x:True, ('RRIDCUR:GiveMeAReason',)), ('workflow:RRIDscibot', 'RRIDCUR:Unresolved'): (lambda x:True, ('RRIDCUR:Unresolved',)), #('workflow:RRID',): ('workflow:RRID', 'RRIDCUR:Missing'), # can't use this yet due to the bad RRID:Missing and friends issues #('',): ('',), } special_cases = {} for special, (test, case) in scs.items(): special_cases[ frozenset((OntId(s) for s in special)) ] = test, frozenset((OntId(c) for c in case)) if tagset in special_cases: test, new_tagset = special_cases[tagset] if test(tagset): self.warnings |= tagset return new_tagset else: return None elif self.warnings: # itags return tagset
def __new__(cls, *args, **kwargs): if not cls._setup_done: cls.infotags = frozenset((OntId(t) for t in cls.info_tags)) # just to make it even more confusing cls.cvtags = {OntId(cvt):OntId(t) for cvt, t in cls.cv_tags.items()} # just to make it even more confusing for tag in set(t for s in chain(cls.terminal_tagsets, (cls.infotags,)) for t in s): if tag.prefix == cls.tag_prefix: @property def getter(self, t=tag): return t in set(self.tagset) or t in set(self.badset) setattr(cls, tag.suffix, getter) cls._setup_done = True return object.__new__(cls)
def organ_term(self, dataset_id): row = self._lookup(dataset_id) if row: ot = row.organ_term if row.organ_term else None if ot: ts = tuple(OntId(t) for t in ot.split(' ') if t) return ts
def _triples(self): for id_, rec in self.records.items(): for field, value in rec: #print(field, value) if not value: # don't add empty edges # FIXME issue with False literal logd.debug(f'caught an empty value on field {id_} {field}') continue if field != 'id' and (str(value).replace('_', ':') in id_ or str(value) in id_): #if field == 'alt_id' and id_[1:] == value: if field != 'old_id': logd.debug( f'caught a mainid appearing as altid {field} {value}' ) continue s, p, o = make_triple(id_, field, value) if not isinstance(o, rdflib.URIRef): try: if o.startswith( ':' ) and ' ' in o: # not a compact repr AND starts with a : because humans are insane o = ' ' + o o = self._graph.check_thing(o) except (AttributeError, KeyError, ValueError) as e: o = rdflib.Literal(o) # trust autoconv #yield OntId(s), OntId(p), self._graph.check_thing(o) # FIXME OntId(p) breaks rdflib rdf:type -> a yield OntId(s), p, o
def __init__(self, path=config.organ_html_path, organs_sheet=None): # FIXME bad passing in organs self.path = path if not self.cache.exists(): self.overview() with open(self.cache, 'wt') as f: json.dump(self.normalized, f) with open(self.old_cache, 'wt') as f: json.dump(self.former_to_current, f) else: with open(self.cache, 'rt') as f: self.normalized = json.load(f) with open(self.old_cache, 'rt') as f: self.former_to_current = json.load(f) if organs_sheet is not None: self._org = organs_sheet bc = self._org.byCol self.manual = { award if award else (award_manual if award_manual else None): [OntId(t) for t in organ_term.split(' ') if t] for award, award_manual, organ_term in zip( bc.award, bc.award_manual, bc.organ_term) if organ_term } else: self.manual = {} self.sourced = {v: k for k, vs in self.normalized.items() for v in vs} self.award_to_organ = { **self.sourced, **self.manual } # manual override
def tag_row(row: list, url: url_for = None, tier_level: int = 0) -> list: ''' Tag each element in the row; atag the curies & ptag everything else ''' tagged_row = [] spaces = nbsp * 8 * tier_level if not row: return row if not isinstance(row, list): row = [row] for i, element in enumerate(row): if i > 0: spaces = '' try: oid = OntId(element) # TODO: should this have spaces? tagged_curie = atag(oid.iri, oid.curie) tagged_row.append(tagged_curie) except: if url: tagged_row.append(ptag(spaces + atag(url, element))) else: tagged_row.append(spaces + element) return tagged_row
def connected(start): log.debug(start) blob = data_sgd.neurons_connectivity(start)#, limit=9999) edges = blob['edges'] neurons = {} types = {} rows = [] start_type = None sc = OntId(start).curie for e in edges: s, p, o = e['sub'], e['pred'], e['obj'] if p == 'operand': continue if s.startswith('_:'): if s not in neurons: neurons[s] = [] types[s] = {} otp = OntTerm(p) oto = OntTerm(o) neurons[s].append((otp, oto)) if o == sc: start_type = otp if oto not in types[s]: types[s][oto] = [] types[s][oto].append(otp) for v in neurons.values(): v.sort() return OntTerm(start), start_type, neurons, types
class Basic(LocalNameManager): brain = OntId('UBERON:0000955')#, label='brain') #projection = Phenotype(ilxtr.ProjectionPhenotype, ilxtr.hasProjectionPhenotype) #intrinsic = Phenotype(ilxtr.InterneuronPhenotype, ilxtr.hasProjectionPhenotype) # FIXME naming projection = Phenotype(ilxtr.ProjectionPhenotype, ilxtr.hasCircuitRolePhenotype) intrinsic = Phenotype(ilxtr.InterneuronPhenotype, ilxtr.hasCircuitRolePhenotype)
def asOboTerm(self, *args, id='tgbugsTODO'): graph = self.query.services[0].graph ilx_id = OntId(self).u s = list(graph[:ilxtr.hasIlxId:ilx_id])[0] if s != ilx_id: self = self.__class__(s) self.fetch() try: term = oio.Term(id=id, name=self.label) if not self.definition: raise ValueError(f'{self} is misisng a definition!') defxrefs = list(graph[s:OntId('ilx.anno.hasDefinitionSource:').u:]) term.add( oio.TVPair(tag='def', text=self.definition, xrefs=defxrefs)) for synonym in (self.synonyms if isinstance(self.synonyms, tuple) else # FIXME ontquery issue (self.synonyms, )): term.add( oio.TVPair(tag='synonym', text=synonym, typedef='EXACT')) # FIXME SIGH brokenness of OntTerm.__call__ return types ps = ('ilx.anno.hasBroadSynonym:', 'oboInOwl:hasBroadSynonym') broads = [v for k, vs in self(*ps).items() if k in ps for v in vs] for bs in broads: term.add(oio.TVPair(tag='synonym', text=bs, typedef='BROAD')) ps = ('ilx.anno.hasRelatedSynonym:', 'oboInOwl:hasRelatedSynonym') relateds = [ v for k, vs in self(*ps).items() if k in ps for v in vs ] for rs in relateds: term.add(oio.TVPair(tag='synonym', text=rs, typedef='RELATED')) for iri in graph[s:ilxtr.hasExistingId]: term.add(oio.TVPair( tag='xref', name=OntId(iri).curie, )) except (AttributeError, TypeError) as e: raise ValueError(self) from e return term
def tagsub(self, tag): try: if tag in self._tag_cache: tag = self._tag_cache[tag] if tag is None: return tag else: subbed = OntId(tag) self._tag_cache[tag] = subbed tag = subbed except (OntId.BadCurieError, OntId.UnknownPrefixError) as e: self._tag_cache[tag] = None return None if tag.prefix == 'RRID': if self.user == self.robot_user: return OntId('workflow:RRIDscibot') else: # TODO suffix check? return OntId('workflow:RRID') elif tag.prefix == 'DOI': return OntId('workflow:DOI') elif tag.prefix == 'PMID': return OntId('workflow:PMID') elif tag.prefix == 'RRIDCUR': api = self.anno_part_instances if self.user == self.robot_user and tag not in api[OntId('workflow:tagScibot')]: return None # return None to gurantee invalid tagset w/o errors elif self.user != self.robot_user and tag not in api[OntId('workflow:tagCurator')]: return None else: return tag else: return None
def __new__(cls, *args, **kwargs): try: self = OntId.__new__(cls, *args, **kwargs) except: breakpoint() raise self._args = args self._kwargs = kwargs return self
def ncbigenemapping(may_need_ncbigene_added): #urlbase = 'https://www.ncbi.nlm.nih.gov/gene/?term=Mus+musculus+' urlbase = ('https://www.ncbi.nlm.nih.gov/gene?term=' '({gene_name}[Gene%20Name])%20AND%20{taxon_suffix}[Taxonomy%20ID]&' 'report=xml') urls = [urlbase.format(gene_name=n, taxon_suffix=10090) for n in may_need_ncbigene_added] done2 = {} for u in urls: if u not in done2: print(u) done2[u] = requests.get(u) base = auth.get_path('resources') / 'genesearch' if not base.exists(): base.mkdir() for resp in done2.values(): fn = OntId(resp.url).quoted with open(base / fn, 'wb') as f: f.write(resp.content) so_much_soup = [(resp.url, BeautifulSoup(resp.content, 'lxml')) for resp in done2.values()] trees = [] for i, (url, soup) in enumerate(so_much_soup): pre = soup.find_all('pre') if pre: for p in pre[0].text.split('\n\n'): if p: tree = etree.fromstring(p) trees.append((url, tree)) else: print('WAT', urls[i]) dimension = 'ilxtr:hasExpressionPhenotype' errors = [] to_add = [] mapping = {} for url, tree in trees: taxon = tree.xpath('//Org-ref//Object-id_id/text()')[0] geneid = tree.xpath('//Gene-track_geneid/text()')[0] genename = tree.xpath('//Gene-ref_locus/text()')[0] if genename in may_need_ncbigene_added and taxon == '10090': print(f'{genename} = Phenotype(\'NCBIGene:{geneid}\', {dimension!r}, label={genename!r}, override=True)') to_add.append(geneid) mapping[genename] = f'NCBIGene:{geneid}' else: errors.append((geneid, genename, taxon, url)) print(errors) _ = [print('NCBIGene:' + ta) for ta in to_add] #wat.find_all('div', **{'class':'rprt-header'}) #wat.find_all('div', **{'class':'ncbi-docsum'}) return mapping, to_add, errors
class RegistrySource(Source): iri = OntId('SCR:005400') @classmethod def loadData(cls): pass @classmethod def validate(cls, tup): return tuple()
def process_nodes(j, root, direction, verbose): nodes = {n['id']: n['lbl'] for n in j['nodes']} nodes[CYCLE] = CYCLE # make sure we can look up the cycle edgerep = [ '{} {} {}'.format(nodes[e['sub']], e['pred'], nodes[e['obj']]) for e in j['edges'] ] # note that if there are multiple relations between s & p then last one wins # sorting by the predicate should help keep it a bit more stable pair_rel = {(e['sub'], e['obj']) if direction == 'OUTGOING' else (e['obj'], e['sub']): e['pred'] + '>' if direction == 'OUTGOING' else '<' + e['pred'] for e in sorted(j['edges'], key=lambda e: e['pred'])} objects = defaultdict(list) # note: not all nodes are objects! for edge in j['edges']: objects[edge['obj']].append(edge['sub']) subjects = defaultdict(list) for edge in j['edges']: subjects[edge['sub']].append(edge['obj']) if root not in nodes and root is not None: root = OntId(root).curie if direction == 'OUTGOING': # flip for the tree objects, subjects = subjects, objects elif direction == 'BOTH': # FIXME BOTH needs help! from pprint import pprint pprint(subjects) pprint(objects) pass # something is wrong with how we are doing subClassOf, see PAXRAT: INCOMING if root is not None: subjects[root] = ['ROOT'] subjects = pruneOutOfTree(subjects, verbose) subjects[root] = [] # FIXME if OUTGOING maybe?? ss, so = set(subjects), set(objects) roots = so - ss leaves = ss - so if root is None: if len(roots) == 1: root = next(iter(roots)) else: root = '*ROOT*' nodes[root] = 'ROOT' objects[root] = list(roots) names = {nodes[k]: [nodes[s] for s in v] for k, v in objects.items()} # children don't need filtering pnames = {nodes[k]: [nodes[s] for s in v] for k, v in subjects.items()} return nodes, objects, subjects, names, pnames, edgerep, root, roots, leaves, pair_rel
def query(self, curie=None, iri=None, label=None, term=None, search=None, **kwargs): # right now we only support exact matches to labels FIXME translate = {rdfs.label:'label', rdfs.subClassOf:'subClassOf', rdf.type:'type', NIFRID.definingCitation:'definingCitation',} out = {} for p, o in super().query(OntId(curie=curie, iri=iri), label, term, search): # FIXME should not have to URIRef at this point ... p = translate[p] if isinstance(o, rdflib.Literal): o = o.toPython() out[p] = o yield out
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # PLEASE DO NOT PUT PMIDs as external ids!!! # FIXME idlib PMID(thing) urg the regex state machine is so simple ;_; if self.id.startswith('PMID:'): log.warning('PMIDs should never be External IDs!') self._term = fake self.s = OntId(self.id).URIRef return self._term = OntTerm(self.id) self.s = self._term.URIRef
def normalize_term(term, prefix=''): term, *curie = term.split('\u1F4A9') #print(repr(term)) if curie: curie, = curie oid = OntId(curie) curie = oid.curie row = [prefix + term, curie] else: curies = get_curies_from_scigraph_label_query(term) row = [prefix + term] + curies return row
def query_scigraph_for_curies(self, label: str, prefixes:List[str] = ['UBERON', 'ILX']) -> list: curies = [] # return [] # for prefix in prefixes: # BUG: prefixes cant be used because it gives random errors if the prefix isn't exact for prefix in prefixes: neighbors = [v.OntTerm for v in OntTerm.query(label=label.strip(), prefix=prefix)] if not neighbors: continue for neighbor in neighbors: oid = OntId(neighbor) curies.append(oid.curie) return curies
def triples_objects_multi(self): for key in self.objects_multi: if key in self.blob: values = self.blob[key] for value in values: if key == 'external': o = OntId(value).URIRef else: value = value.replace( ' ', '-') # FIXME require no spaces in internal ids o = self.context[value] yield self.s, readable[key], o
def get_itrips(self): results = self.get_scigraph_onts() iris = sorted(set(r['iri'] for r in results)) gin = lambda i: (i, self.sgg.getNeighbors(i, relationshipType='isDefinedBy', direction='OUTGOING')) nodes = Async()(deferred(gin)(i) for i in iris) imports = [(i, *[(e['obj'], 'owl:imports', e['sub']) for e in n['edges']]) for i, n in nodes if n] self.itrips = sorted( set( tuple(rdflib.URIRef(OntId(e).iri) for e in t) for i, *ts in imports if ts for t in ts)) return self.itrips
def fromRdf(cls, uri, graph, context=None): oid = OntId(uri) id = oid.curie blob = {'id': id} for p, o in graph[uri]: if p == rdf.type: key = 'class' value = 'External' else: if p == rdfs.label: key = 'name' else: _, key = p.rsplit('/', 1) if isinstance(o, rdflib.Literal): value = o.toPython() elif isinstance(o, rdflib.URIRef): oid = OntId(o) if oid.prefix == 'local': value = oid.suffix elif oid.prefix == 'apinatomy': # FIXME hrm? value = oid.suffix else: value = oid.curie # FIXME external is tricky log.warning(f'{oid!r}') if key in cls.objects_multi: if key in blob: blob[key].append(value) else: blob[key] = [value] else: blob[key] = value return cls(blob, context)
def get_curies_from_scigraph_label_query(label: str, prefixes: List[str] = [ 'UBERON', 'ILX', 'PAXRAT' ]) -> list: curies = set() for prefix in prefixes: # TODO: if not stipped the label will return nothing. Seems to be trailing spaces neighbors = [ v.OntTerm for v in OntTerm.query(label=label.strip(), prefix=prefix) ] if not neighbors: continue for neighbor in neighbors: curies.add(OntId(neighbor).curie) return list(curies)
def triples(self, subject=None): if subject is None: subject = rdflib.BNode() if self.tag == 'id': yield id_fix(self.value), rdf.type, owl.Class elif self.tag in obo_tag_to_ttl: predicate = obo_tag_to_ttl[self.tag] if self.tag == 'def': #value = self._value.text.replace('"','\\"') value = self._value.text object = rdflib.Literal(value) elif self.tag == 'synonym': value = self._value.text.lower() object = rdflib.Literal(value) elif self.tag == 'is_a': if self._value.target == self._value.DANGLING: # we dangling value = self._value.target_id else: value = id_fix(self._value.target.id_.value) object = rdflib.URIRef(value) elif self.tag == 'name': value = self.value.lower( ) # capitalize only proper nouns as needed object = rdflib.Literal(value) elif self.tag == 'xref': value = self.value if '\:' in value: value = value.replace('\:', ':') try: object = OntId(value).URIRef except (OntId.UnknownPrefixError, OntId.BadCurieError) as e: object = rdflib.Literal(value) # FIXME else: value = self.value if '\:' in value: value = value.replace('\:', ':') object = rdflib.URIRef(value) yield subject, predicate, object
def id_fix(value): """ fix @prefix values for ttl """ if value.startswith('KSC_M'): pass else: value = value.replace(':', '_') if value.startswith('ERO') or value.startswith( 'OBI') or value.startswith('GO') or value.startswith( 'UBERON') or value.startswith('IAO'): value = 'obo:' + value elif value.startswith('birnlex') or value.startswith('nlx'): value = 'NIFSTD:' + value elif value.startswith('MESH'): value = ':'.join(value.split('_')) else: value = ':' + value return OntId(value).URIRef
def process_nodes(j, root, direction, verbose): nodes = {n['id']: n['lbl'] for n in j['nodes']} nodes[CYCLE] = CYCLE # make sure we can look up the cycle edgerep = [ '{} {} {}'.format(nodes[e['sub']], e['pred'], nodes[e['obj']]) for e in j['edges'] ] objects = defaultdict(list) # note: not all nodes are objects! for edge in j['edges']: objects[edge['obj']].append(edge['sub']) subjects = defaultdict(list) for edge in j['edges']: subjects[edge['sub']].append(edge['obj']) if root not in nodes and root is not None: root = OntId(root).curie if direction == 'OUTGOING' or direction == 'BOTH': # flip for the tree # FIXME BOTH needs help! objects, subjects = subjects, objects # something is wrong with how we are doing subClassOf, see PAXRAT: INCOMING if root is not None: subjects[root] = ['ROOT'] subjects = pruneOutOfTree(subjects, verbose) subjects[root] = [] # FIXME if OUTGOING maybe?? ss, so = set(subjects), set(objects) roots = so - ss leaves = ss - so if root is None: if len(roots) == 1: root = next(iter(roots)) else: root = '*ROOT*' nodes[root] = 'ROOT' objects[root] = list(roots) names = {nodes[k]: [nodes[s] for s in v] for k, v in objects.items()} # children don't need filtering pnames = {nodes[k]: [nodes[s] for s in v] for k, v in subjects.items()} return nodes, objects, subjects, names, pnames, edgerep, root, roots, leaves
def submit_to_obofile(self, of, prefix, id_range): terms = [ro.id for ro in self.to_submit()] [t.fetch() for t in terms] #obo_terms = [t.asOboTerm(id=f'tgbugsTODO{i}') for i, t in enumerate(terms)] id_min, id_max = id_range over_under = [ int(b.id_.value.suffix) for b in of.Terms.values() if not isinstance(b, list) and ':' in b.id_.value and id_min < int(OntId(b.id_.value).suffix) < id_max ] if over_under: id_start = max(over_under) + 1 else: id_start = id_min # TODO padding and prefix detect etc. obo_terms = [ t.asOboTerm(id=f'{prefix}:{id_start + i}') for i, t in enumerate(terms) ] of.add(*obo_terms)
def fromRdf(cls, uri, graph, context=None): _, id = uri.rsplit('/', 1) blob = {'id': id} for p, o in graph[uri]: if p == rdf.type: if o != owl.NamedIndividual: key = 'class' _, value = o.rsplit('/', 1) else: continue # TODO s rdf:type apinatomy:External ?? else: if p == rdfs.label: key = 'name' else: _, key = p.rsplit('/', 1) if isinstance(o, rdflib.Literal): value = o.toPython() elif isinstance(o, rdflib.URIRef): oid = OntId(o) if oid.prefix == 'local': value = oid.suffix elif oid.prefix == 'apinatomy': # FIXME hrm? value = oid.suffix else: value = oid.curie # FIXME external is tricky log.warning(f'{oid!r}') else: raise NotImplementedError(f'{o}') if key in cls.objects_multi: if key in blob: blob[key].append(value) else: blob[key] = [value] else: blob[key] = value return cls(blob, context)
), # e.g. delayed saccad # behaviorl task structure # reward type # structure in the sensory environment that they need to process # working memory is more like a study target ## technique oc(BFO['0000015']), olit(BFO['0000015'], rdfs.label, 'process'), oc(ilxtr.technique, BFO['0000015']), # FIXME technique/ olit(ilxtr.technique, rdfs.label, 'technique'), olit(ilxtr.technique, NIFRID.synonym, 'method'), olit( ilxtr.technique, definition, 'A repeatable process that is constrained by some prior information.'), (ilxtr.technique, ilxtr.hasTempId, OntId('HBP_MEM:0000000')), # NOTE: not all techniques have primary participants, especially in the case of composite techniques oc_(ilxtr.technique, restriction(ilxtr.hasExecutor, ilxtr.executor)), oc_( rdflib.BNode(), oECN( intersectionOf( BFO['0000015'], restN(hasParticipant, restN(ilxtr.hasAspect, asp.nonLocal))), # vs hasExpAspect intersectionOf( BFO['0000015'], restN(ilxtr.processHasAspect, restN(ilxtr.hasContext, BFO['0000002'])))), # FIXME still doesn't get the binding right intersectionOf(
if p.exists(): return send_from_directory(p.parent.as_posix(), p.name) log.critical(f'{resources}/sawg.org has not been published') return send_from_directory(resources.as_posix(), 'sawg.org') #return hfn.htmldoc( #atag(url_for('route_sparc_view'), 'Terms by region or atlas'), '<br>', #atag(url_for('route_sparc_index'), 'Index'), #title='SPARC Anatomical terms', styles=["p {margin: 0px; padding: 0px;}"], #metas = ({'name':'date', 'content':time()},), #) return app # for now hardcode test_terms = [OntId('UBERON:0001759'), OntId('UBERON:0000388'), OntId('UBERON:0001629'), OntId('UBERON:0001723'), OntId('UBERON:0001737'), OntId('UBERON:0001930'), OntId('UBERON:0001989'), OntId('UBERON:0001990'), OntId('UBERON:0002024'), OntId('UBERON:0002440'), OntId('UBERON:0003126'), OntId('UBERON:0003708'), OntId('UBERON:0009050'), OntId('UBERON:0011326'), OntId('FMA:6240'), OntId('FMA:6243'),
def triples_external(self): if 'externals' in self.blob: for external in self.blob['externals']: yield self.s, rdf.type, OntId(external).URIRef