def download_articles(self): """ Function that downloads articles' text. :return: Results list. """ logger.info('') m = multiprocessing.Manager() results = m.list() utils.async_getter( function_pointer=self.download_article, input_list=self.parent.articles, pass_data={"results": results}, ) return results
def get_rss_feeds(self, num_feeds_max): """ Function that downloads RSS feeds (links are stored in RSS_FEEDS), and returns articles in the required format in a list. :param num_feeds_max: Maximum number of feeds. :return: Articles in a list. """ logger.info('num_feeds_max:%s' % num_feeds_max) m = multiprocessing.Manager() articles = m.list() rss_feeds = RSS_FEEDS[:num_feeds_max] utils.async_getter(function_pointer=self.get_rss_feed, input_list=rss_feeds, pass_data={"results": articles}) return articles
def main(): files = glob(os.path.expanduser('~/git/methodsOntology-upstream/to_be_integrated_in_NIF/*')) rows = [] got_header = False for file in files: with open(file, 'rt') as f: r = [r for r in csv.reader(f, delimiter='|')] if got_header: r = r[1:] else: got_header = True rows.extend(r) def async_func(row): resps = sgv.findByTerm(row[2]) if resps: n = resps[0] c, l = n['curie'], n['labels'][0] else: c, l = None, None r = row + [c, l] return r matched = [rows[0] + ['e_curie', 'e_label']] + async_getter(async_func, [(r,) for r in rows[1:]]) embed()
def swanson(): """ not really a parcellation scheme """ source = 'resources/swanson_aligned.txt' ONT_PATH = GENERATED filename = 'swanson_hierarchies' ontid = ONT_PATH + filename + '.ttl' PREFIXES = makePrefixes('', 'ilx', 'owl', 'skos', 'NIFRID', 'ILXREPLACE') PREFIXES.update({ #'':ontid + '/', # looking for better options 'SWAN': interlex_namespace('swanson/nt/term'), 'SWAA': interlex_namespace('swanson/nt/appendix'), }) new_graph = makeGraph(filename, PREFIXES, writeloc=WRITELOC) new_graph.add_ont( ontid, 'Swanson brain partomies', 'Swanson 2014 Partonomies', 'This file is automatically generated from ' + source + '.' + NOTICE, TODAY) # FIXME citations should really go on the ... anatomy? scheme artifact definingCitation = 'Swanson, Larry W. Neuroanatomical Terminology: a lexicon of classical origins and historical foundations. Oxford University Press, USA, 2014.' definingCitationID = 'ISBN:9780195340624' new_graph.add_trip(ontid, 'NIFRID:definingCitation', definingCitation) new_graph.add_trip(ontid, 'NIFRID:definingCitationID', definingCitationID) with open(source, 'rt') as f: lines = [l.strip() for l in f.readlines()] # join header on page 794 lines[635] += ' ' + lines.pop(636) #fix for capitalization since this header is reused fixed = ' or '.join([ ' ('.join([n.capitalize() for n in _.split(' (')]) for _ in lines[635].lower().split(' or ') ]).replace('human', 'HUMAN') lines[635] = fixed data = [] for l in lines: if not l.startswith('#'): level = l.count('.' * 5) l = l.strip('.') if ' (' in l: if ') or' in l: n1, l = l.split(') or') area_name, citationP = n1.strip().split(' (') citation = citationP.rstrip(')') d = (level, area_name, citation, 'NEXT SYN') data.append(d) #print(tc.red(tc.bold(repr(d)))) area_name, citationP = l.strip().split(' (') citation = citationP.rstrip(')') else: area_name = l citation = None d = (level, area_name, citation, None) #print(d) data.append(d) results = async_getter(sgv.findByTerm, [(d[1], ) for d in data]) #results = [None] * len(data) curies = [[r['curie'] for r in _ if 'UBERON' in r['curie']] if _ else [] for _ in results] output = [_[0] if _ else None for _ in curies] header = ['Depth', 'Name', 'Citation', 'NextSyn', 'Uberon'] zoop = [header] + [r for r in zip(*zip(*data), output)] + \ [(0, 'Appendix END None', None, None, None)] # needed to add last appendix class SP(rowParse): def __init__(self): self.nodes = defaultdict(dict) self._appendix = 0 self.appendicies = {} self._last_at_level = {} self.names = defaultdict(set) self.children = defaultdict(set) self.parents = defaultdict(set) self.next_syn = False super().__init__(zoop) def Depth(self, value): if self.next_syn: self.synonym = self.next_syn else: self.synonym = False self.depth = value def Name(self, value): self.name = value def Citation(self, value): self.citation = value def NextSyn(self, value): if value: self.next_syn = self._rowind else: self.next_syn = False def Uberon(self, value): self.uberon = value def _row_post(self): # check if we are in the next appendix # may want to xref ids between appendicies as well... if self.depth == 0: if self.name.startswith('Appendix'): if self._appendix: self.appendicies[self._appendix]['children'] = dict( self.children) self.appendicies[self._appendix]['parents'] = dict( self.parents) self._last_at_level = {} self.children = defaultdict(set) self.parents = defaultdict(set) _, num, apname = self.name.split(' ', 2) if num == 'END': return self._appendix = int(num) self.appendicies[self._appendix] = { 'name': apname.capitalize(), 'type': self.citation.capitalize() if self.citation else None } return else: if ' [' in self.name: name, taxonB = self.name.split(' [') self.name = name self.appendicies[self._appendix][ 'taxon'] = taxonB.rstrip(']').capitalize() else: # top level is animalia self.appendicies[ self._appendix]['taxon'] = 'ANIMALIA'.capitalize() self.name = self.name.capitalize() self.citation = self.citation.capitalize() # nodes if self.synonym: self.nodes[self.synonym]['synonym'] = self.name self.nodes[self.synonym]['syn-cite'] = self.citation self.nodes[self.synonym]['syn-uberon'] = self.uberon return else: if self.citation: # Transverse Longitudinal etc all @ lvl4 self.names[self.name + ' ' + self.citation].add( self._rowind) else: self.name += str(self._appendix) + self.nodes[ self._last_at_level[self.depth - 1]]['label'] #print(level, self.name) # can't return here because they are their own level # replace with actually doing something... self.nodes[self._rowind]['label'] = self.name self.nodes[self._rowind]['citation'] = self.citation self.nodes[self._rowind]['uberon'] = self.uberon # edges self._last_at_level[self.depth] = self._rowind # TODO will need something to deal with the Lateral/ if self.depth > 0: try: parent = self._last_at_level[self.depth - 1] except: embed() self.children[parent].add(self._rowind) self.parents[self._rowind].add(parent) def _end(self): replace = {} for asdf in [ sorted(n) for k, n in self.names.items() if len(n) > 1 ]: replace_with, to_replace = asdf[0], asdf[1:] for r in to_replace: replace[r] = replace_with for r, rw in replace.items(): #print(self.nodes[rw]) o = self.nodes.pop(r) #print(o) for vals in self.appendicies.values(): children = vals['children'] parents = vals['parents'] # need reversed so children are corrected before swap for r, rw in reversed(sorted(replace.items())): if r in parents: child = r new_child = rw parent = parents.pop(child) parents[new_child] = parent parent = list(parent)[0] children[parent].remove(child) children[parent].add(new_child) if r in children: parent = r new_parent = rw childs = children.pop(parent) children[new_parent] = childs for child in childs: parents[child] = {new_parent} self.nodes = dict(self.nodes) sp = SP() tp = [ _ for _ in sorted([ '{: <50}'.format(n['label']) + n['uberon'] if n['uberon'] else n['label'] for n in sp.nodes.values() ]) ] #print('\n'.join(tp)) #print(sp.appendicies[1].keys()) #print(sp.nodes[1].keys()) nbase = PREFIXES['SWAN'] + '%s' json_ = {'nodes': [], 'edges': []} parent = ILXREPLACE('swansonBrainRegionConcept') for node, anns in sp.nodes.items(): nid = nbase % node new_graph.add_class(nid, parent, label=anns['label']) new_graph.add_trip(nid, 'NIFRID:definingCitation', anns['citation']) json_['nodes'].append({'lbl': anns['label'], 'id': 'SWA:' + str(node)}) #if anns['uberon']: #new_graph.add_trip(nid, owl.equivalentClass, anns['uberon']) # issues arrise here... for appendix, data in sp.appendicies.items(): aid = PREFIXES['SWAA'] + str(appendix) new_graph.add_class(aid, label=data['name'].capitalize()) new_graph.add_trip( aid, 'ilx:hasTaxonRank', data['taxon']) # FIXME appendix is the data artifact... children = data['children'] ahp = HASPART + str(appendix) apo = PARTOF + str(appendix) new_graph.add_op(ahp, transitive=True) new_graph.add_op(apo, inverse=ahp, transitive=True) for parent, childs in children.items( ): # FIXME does this give complete coverage? pid = nbase % parent for child in childs: cid = nbase % child new_graph.add_hierarchy( cid, ahp, pid) # note hierarhcy inverts direction new_graph.add_hierarchy(pid, apo, cid) json_['edges'].append({ 'sub': 'SWA:' + str(child), 'pred': apo, 'obj': 'SWA:' + str(parent) }) new_graph.write() if False: Query = namedtuple('Query', ['root', 'relationshipType', 'direction', 'depth']) mapping = (1, 1, 1, 1, 30, 83, 69, 70, 74, 1) # should generate? for i, n in enumerate(mapping): a, b = creatTree(*Query('SWA:' + str(n), 'ilx:partOf' + str(i + 1), 'INCOMING', 10), json=json_) print(a) return ontid, None
def make_nifga_graph(_doprint=False): # use equivalent class mappings to build a replacement mapping g = rdflib.Graph() g.parse(nifga_path, format='turtle') getQname = g.namespace_manager.qname classes = sorted([ getQname(_) for _ in g.subjects(RDF.type, OWL.Class) if type(_) is URIRef ]) curies = ['NIFGA:' + n for n in classes if ':' not in n] matches = async_getter(sgv.findById, [(c, ) for c in curies]) replaced_by = {} exact = {} internal_equivs = {} irbcs = {} def equiv(curie, label): if curie in manual: replaced_by[curie] = manual[curie] return manual[curie] ec = sgg.getNeighbors(curie, relationshipType='equivalentClass') nodes = [n for n in ec['nodes'] if n['id'] != curie] if len(nodes) > 1: #print('wtf node', [n['id'] for n in nodes], curie) for node in nodes: id_ = node['id'] label_ = node['lbl'] if id_.startswith('UBERON'): if curie in replaced_by: one = replaced_by[curie] replaced_by[curie] = one, id_ print('WE GOT DUPES', curie, label, one, id_) # TODO else: replaced_by[curie] = id_ else: internal_equivs[curie] = id_ elif not nodes: node = sgg.getNode(curie)['nodes'][0] if OWL.deprecated.toPython() in node['meta']: print('THIS CLASS IS DEPRECATED', curie) lbl = node['lbl'] if lbl.startswith( 'Predominantly white regional') or lbl.startswith( 'Predominantly gray regional'): print('\tHE\'S DEAD JIM!', lbl, node['id']) replaced_by[curie] = 'NOREP' if IRBC in node['meta']: existing_replaced = node['meta'][IRBC][0] ec2 = sgg.getNeighbors(existing_replaced, relationshipType='equivalentClass') print('\tFOUND ONE', existing_replaced) #scigPrint.pprint_node(sgg.getNode(existing_replaced)) if ec2['edges']: # pass the buck if we can print('\t', end='') scigPrint.pprint_edge(ec2['edges'][0]) rb = ec2['edges'][0]['obj'] print('\tPASSING BUCK : (%s -> %s -> %s)' % (curie, existing_replaced, rb)) irbcs[curie] = (existing_replaced, rb) replaced_by[curie] = rb return nodes else: er_node = sgv.findById(existing_replaced) if not er_node['deprecated']: if not er_node['curie'].startswith('NIFGA:'): print('\tPASSING BUCK : (%s -> %s)' % (curie, er_node['curie'])) return nodes print( '\tERROR: could not pass buck, we are at a dead end at', er_node) # TODO print() moar = [ t for t in sgv.findByTerm(label) if t['curie'].startswith('UBERON') ] if moar: #print(moar) #replaced_by[curie] = moar[0]['curie'] if len(moar) > 1: print('WARNING', curie, label, [(m['curie'], m['labels'][0]) for m in moar]) for node in moar: #if node['curie'] in uberon_obsolete: # node['deprecated']? #continue ns = sgg.getNode(node['curie']) assert len( ns['nodes']) == 1, "WTF IS GOING ON %s" % node['curie'] ns = ns['nodes'][0] if _doprint: print( 'Found putative replacement in moar: (%s -> %s)' % (curie, ns['id'])) if DBX in ns['meta']: print(' ' * 8, node['curie'], ns['meta'][DBX], node['labels'][0], node['synonyms']) if AID in ns['meta']: print(' ' * 8, node['curie'], ns['meta'][AID], node['labels'][0], node['synonyms']) if CON in ns['meta']: print(' ' * 8, node['curie'], ns['meta'][CON], node['labels'][0], node['synonyms']) replaced_by[curie] = ns['id'] else: replaced_by[curie] = None if False: # review print('NO FORWARD EQUIV', tc.red(curie), label) # TODO for k, v in sorted( sgg.getNode(curie)['nodes'][0]['meta'].items()): if type(v) == iter: print(' ' * 4, k) for _ in v: print(' ' * 8, _) else: print(' ' * 4, k, v) else: node = nodes[0] replaced_by[curie] = node['id'] exact[curie] = node['id'] return nodes equivs = [equiv(c['curie'], c['labels'][0]) for c in matches] # async causes print issues :/ return g, matches, exact, internal_equivs, irbcs, replaced_by
curies = c.getCuries() curies.pop('') # don't want NIFSTD uris just yet with open(os.path.expanduser('~/git/nlxeol/neurolex_full.csv'), 'rt') as f: rows = [r for r in csv.reader(f)] Id = rows[0].index('Id') ids = [(r[Id],) for r in rows if r[Id] and r[Id] != 'Id' and 'Resource:' not in r[0]] items = tuple(curies.items()) findById = v.findById def async_func(id_): if ':' in id_: out = findById(id_) if out: return id_, out['curie'] for prefix, uri in items: curie = prefix + ':' + id_ out = findById(curie) if out: return id_, out['curie'] return id_, 'NLXONLY' id_curie = async_getter(async_func, ids) j = {id_:curie for id_, curie in id_curie} with open('/tmp/total_curie_fragment.json', 'wt') as f: json.dump(j, f, sort_keys=True, indent=4)
with open(os.path.expanduser('~/git/nlxeol/neurolex_full.csv'), 'rt') as f: rows = [r for r in csv.reader(f)] Id = rows[0].index('Id') ids = [(r[Id], ) for r in rows if r[Id] and r[Id] != 'Id' and 'Resource:' not in r[0]] items = tuple(curies.items()) findById = v.findById def async_func(id_): if ':' in id_: out = findById(id_) if out: return id_, out['curie'] for prefix, uri in items: curie = prefix + ':' + id_ out = findById(curie) if out: return id_, out['curie'] return id_, 'NLXONLY' id_curie = async_getter(async_func, ids) j = {id_: curie for id_, curie in id_curie} with open('/tmp/total_curie_fragment.json', 'wt') as f: json.dump(j, f, sort_keys=True, indent=4)
def swanson(): """ not really a parcellation scheme """ ONT_PATH = 'http://ontology.neuinfo.org/NIF/ttl/generated/' filename = 'swanson_hierarchies' ontid = ONT_PATH + filename + '.ttl' PREFIXES = makePrefixes('ilx', 'owl', 'OBOANN', 'UBERON') PREFIXES.update({ '':ontid + '/', # looking for better options 'SWAN':'http://swanson.org/node/', 'SWAA':'http://swanson.org/appendix/', }) new_graph = makeGraph(filename, PREFIXES, writeloc='/tmp/parc/') new_graph.add_ont(ontid, 'Swanson brain partomies', 'Swanson 2014 Partonomies', 'This file is automatically generated from....', TODAY) with open('resources/swanson_aligned.txt', 'rt') as f: lines = [l.strip() for l in f.readlines()] # join header on page 794 lines[635] += ' ' + lines.pop(636) #fix for capitalization since this header is reused fixed = ' or '.join([' ('.join([n.capitalize() for n in _.split(' (')]) for _ in lines[635].lower().split(' or ')]).replace('human','HUMAN') lines[635] = fixed data = [] for l in lines: if not l.startswith('#'): level = l.count('.'*5) l = l.strip('.') if ' (' in l: if ') or' in l: n1, l = l.split(') or') area_name, citationP = n1.strip().split(' (') citation = citationP.rstrip(')') d = (level, area_name, citation, 'NEXT SYN') data.append(d) #print(tc.red(tc.bold(repr(d)))) area_name, citationP = l.strip().split(' (') citation = citationP.rstrip(')') else: area_name = l citation = None d = (level, area_name, citation, None) #print(d) data.append(d) results = async_getter(sgv.findByTerm, [(d[1],) for d in data]) #results = [None] * len(data) curies = [[r['curie'] for r in _ if 'UBERON' in r['curie']] if _ else [] for _ in results] output = [_[0] if _ else None for _ in curies] header = ['Depth', 'Name', 'Citation', 'NextSyn', 'Uberon'] zoop = [header] + [r for r in zip(*zip(*data), output)] + \ [(0, 'Appendix END None', None, None, None)] # needed to add last appendix class SP(rowParse): def __init__(self): self.nodes = defaultdict(dict) self._appendix = 0 self.appendicies = {} self._last_at_level = {} self.names = defaultdict(set) self.children = defaultdict(set) self.parents = defaultdict(set) self.next_syn = False super().__init__(zoop) def Depth(self, value): if self.next_syn: self.synonym = self.next_syn else: self.synonym = False self.depth = value def Name(self, value): self.name = value def Citation(self, value): self.citation = value def NextSyn(self, value): if value: self.next_syn = self._rowind else: self.next_syn = False def Uberon(self, value): self.uberon = value def _row_post(self): # check if we are in the next appendix # may want to xref ids between appendicies as well... if self.depth == 0: if self.name.startswith('Appendix'): if self._appendix: self.appendicies[self._appendix]['children'] = dict(self.children) self.appendicies[self._appendix]['parents'] = dict(self.parents) self._last_at_level = {} self.children = defaultdict(set) self.parents = defaultdict(set) _, num, apname = self.name.split(' ', 2) if num == 'END': return self._appendix = int(num) self.appendicies[self._appendix] = { 'name':apname.capitalize(), 'type':self.citation.capitalize() if self.citation else None} return else: if ' [' in self.name: name, taxonB = self.name.split(' [') self.name = name self.appendicies[self._appendix]['taxon'] = taxonB.rstrip(']').capitalize() else: # top level is animalia self.appendicies[self._appendix]['taxon'] = 'ANIMALIA'.capitalize() self.name = self.name.capitalize() self.citation = self.citation.capitalize() # nodes if self.synonym: self.nodes[self.synonym]['synonym'] = self.name self.nodes[self.synonym]['syn-cite'] = self.citation self.nodes[self.synonym]['syn-uberon'] = self.uberon return else: if self.citation: # Transverse Longitudinal etc all @ lvl4 self.names[self.name + ' ' + self.citation].add(self._rowind) else: self.name += str(self._appendix) + self.nodes[self._last_at_level[self.depth - 1]]['label'] #print(level, self.name) # can't return here because they are their own level # replace with actually doing something... self.nodes[self._rowind]['label'] = self.name self.nodes[self._rowind]['citation'] = self.citation self.nodes[self._rowind]['uberon'] = self.uberon # edges self._last_at_level[self.depth] = self._rowind # TODO will need something to deal with the Lateral/ if self.depth > 0: try: parent = self._last_at_level[self.depth - 1] except: embed() self.children[parent].add(self._rowind) self.parents[self._rowind].add(parent) def _end(self): replace = {} for asdf in [sorted(n) for k,n in self.names.items() if len(n) > 1]: replace_with, to_replace = asdf[0], asdf[1:] for r in to_replace: replace[r] = replace_with for r, rw in replace.items(): #print(self.nodes[rw]) o = self.nodes.pop(r) #print(o) for vals in self.appendicies.values(): children = vals['children'] parents = vals['parents'] # need reversed so children are corrected before swap for r, rw in reversed(sorted(replace.items())): if r in parents: child = r new_child = rw parent = parents.pop(child) parents[new_child] = parent parent = list(parent)[0] children[parent].remove(child) children[parent].add(new_child) if r in children: parent = r new_parent = rw childs = children.pop(parent) children[new_parent] = childs for child in childs: parents[child] = {new_parent} self.nodes = dict(self.nodes) sp = SP() tp = [_ for _ in sorted(['{: <50}'.format(n['label']) + n['uberon'] if n['uberon'] else n['label'] for n in sp.nodes.values()])] #print('\n'.join(tp)) #print(sp.appendicies[1].keys()) #print(sp.nodes[1].keys()) nbase = 'http://swanson.org/node/%s' json_ = {'nodes':[],'edges':[]} for node, anns in sp.nodes.items(): nid = nbase % node new_graph.add_class(nid, 'ilx:swansonBrainRegionConcept', label=anns['label']) new_graph.add_node(nid, 'OBOANN:definingCitation', anns['citation']) json_['nodes'].append({'lbl':anns['label'],'id':'SWA:' + str(node)}) #if anns['uberon']: #new_graph.add_node(nid, rdflib.OWL.equivalentClass, anns['uberon']) # issues arrise here... for appendix, data in sp.appendicies.items(): aid = 'http://swanson.org/appendix/%s' % appendix new_graph.add_class(aid, label=data['name'].capitalize()) new_graph.add_node(aid, 'ilx:hasTaxonRank', data['taxon']) # FIXME appendix is the data artifact... children = data['children'] ahp = HASPART + str(appendix) apo = PARTOF + str(appendix) new_graph.add_op(ahp, transitive=True) new_graph.add_op(apo, inverse=ahp, transitive=True) for parent, childs in children.items(): # FIXME does this give complete coverage? pid = nbase % parent for child in childs: cid = nbase % child new_graph.add_hierarchy(cid, ahp, pid) # note hierarhcy inverts direction new_graph.add_hierarchy(pid, apo, cid) json_['edges'].append({'sub':'SWA:' + str(child),'pred':apo,'obj':'SWA:' + str(parent)}) new_graph.write(convert=False) if False: Query = namedtuple('Query', ['root','relationshipType','direction','depth']) mapping = (1, 1, 1, 1, 30, 83, 69, 70, 74, 1) # should generate? for i, n in enumerate(mapping): a, b = creatTree(*Query('SWA:' + str(n), 'ilx:partOf' + str(i + 1), 'INCOMING', 10), json=json_) print(a) return ontid, None
def make_nifga_graph(_doprint=False): # use equivalent class mappings to build a replacement mapping g = rdflib.Graph() g.parse(nifga_path, format='turtle') getQname = g.namespace_manager.qname classes = sorted([getQname(_) for _ in g.subjects(RDF.type, OWL.Class) if type(_) is URIRef]) curies = ['NIFGA:' + n for n in classes if ':' not in n] matches = async_getter(sgv.findById, [(c,) for c in curies]) replaced_by = {} exact = {} internal_equivs = {} irbcs = {} def equiv(curie, label): if curie in manual: replaced_by[curie] = manual[curie] return manual[curie] ec = sgg.getNeighbors(curie, relationshipType='equivalentClass') nodes = [n for n in ec['nodes'] if n['id'] != curie] if len(nodes) > 1: #print('wtf node', [n['id'] for n in nodes], curie) for node in nodes: id_ = node['id'] label_ = node['lbl'] if id_.startswith('UBERON'): if curie in replaced_by: one = replaced_by[curie] replaced_by[curie] = one, id_ print('WE GOT DUPES', curie, label, one, id_) # TODO else: replaced_by[curie] = id_ else: internal_equivs[curie] = id_ elif not nodes: node = sgg.getNode(curie)['nodes'][0] if OWL.deprecated.toPython() in node['meta']: print('THIS CLASS IS DEPRECATED', curie) lbl = node['lbl'] if lbl.startswith('Predominantly white regional') or lbl.startswith('Predominantly gray regional'): print('\tHE\'S DEAD JIM!', lbl, node['id']) replaced_by[curie] = 'NOREP' if IRBC in node['meta']: existing_replaced = node['meta'][IRBC][0] ec2 = sgg.getNeighbors(existing_replaced, relationshipType='equivalentClass') print('\tFOUND ONE', existing_replaced) #scigPrint.pprint_node(sgg.getNode(existing_replaced)) if ec2['edges']: # pass the buck if we can print('\t',end='') scigPrint.pprint_edge(ec2['edges'][0]) rb = ec2['edges'][0]['obj'] print('\tPASSING BUCK : (%s -> %s -> %s)' % (curie, existing_replaced, rb)) irbcs[curie] = (existing_replaced, rb) replaced_by[curie] = rb return nodes else: er_node = sgv.findById(existing_replaced) if not er_node['deprecated']: if not er_node['curie'].startswith('NIFGA:'): print('\tPASSING BUCK : (%s -> %s)' % (curie, er_node['curie'])) return nodes print('\tERROR: could not pass buck, we are at a dead end at', er_node) # TODO print() moar = [t for t in sgv.findByTerm(label) if t['curie'].startswith('UBERON')] if moar: #print(moar) #replaced_by[curie] = moar[0]['curie'] if len(moar) > 1: print('WARNING', curie, label, [(m['curie'], m['labels'][0]) for m in moar]) for node in moar: #if node['curie'] in uberon_obsolete: # node['deprecated']? #continue ns = sgg.getNode(node['curie']) assert len(ns['nodes']) == 1, "WTF IS GOING ON %s" % node['curie'] ns = ns['nodes'][0] if _doprint: print('Found putative replacement in moar: (%s -> %s)' % (curie, ns['id'])) if DBX in ns['meta']: print(' ' * 8, node['curie'], ns['meta'][DBX], node['labels'][0], node['synonyms']) if AID in ns['meta']: print(' ' * 8, node['curie'], ns['meta'][AID], node['labels'][0], node['synonyms']) if CON in ns['meta']: print(' ' * 8, node['curie'], ns['meta'][CON], node['labels'][0], node['synonyms']) replaced_by[curie] = ns['id'] else: replaced_by[curie] = None if False: # review print('NO FORWARD EQUIV', tc.red(curie), label) # TODO for k,v in sorted(sgg.getNode(curie)['nodes'][0]['meta'].items()): if type(v) == iter: print(' ' * 4, k) for _ in v: print(' ' * 8, _) else: print(' ' * 4, k, v) else: node = nodes[0] replaced_by[curie] = node['id'] exact[curie] = node['id'] return nodes equivs = [equiv(c['curie'], c['labels'][0]) for c in matches] # async causes print issues :/ return g, matches, exact, internal_equivs, irbcs, replaced_by