def get_scigraph_nodes(id_list) -> Iterator[Dict]: """ Queries scigraph neighbors to get a list of nodes back We use the scigraph neighbors function because ids can be sent in batch which is faster than iteratively querying solr search or the scigraph graph/id function :return: json decoded result from scigraph_ontology._neighbors_graph :raises ValueError: If id is not in scigraph """ scigraph = RemoteScigraphOntology('scigraph:data') chunks = [id_list[i:i + 100] for i in range(0, len(list(id_list)), 100)] for chunk in chunks: params = {'id': chunk, 'depth': 0} try: result_graph = scigraph._neighbors_graph(**params) for node in result_graph['nodes']: yield node except JSONDecodeError as exception: # Assume json decode is due to an incorrect class ID # Should we handle this? raise ValueError(exception.doc)
def create_ontology(handle=None, **args): ont = None logging.info("Determining strategy to load '{}' into memory...".format(handle)) if handle.find("+") > -1: handles = handle.split("+") onts = [create_ontology(ont) for ont in handles] ont = onts.pop() ont.merge(onts) return ont # TODO: consider replacing with plugin architecture if handle.find(".") > 0 and os.path.isfile(handle): logging.info("Fetching obograph-json file from filesystem") ont = translate_file_to_ontology(handle, **args) elif handle.startswith("obo:"): logging.info("Fetching from OBO PURL") if handle.find(".") == -1: handle += '.owl' fn = '/tmp/'+handle if not os.path.isfile(fn): url = handle.replace("obo:","http://purl.obolibrary.org/obo/") cmd = ['owltools',url,'-o','-f','json',fn] cp = subprocess.run(cmd, check=True) logging.info(cp) else: logging.info("using cached file: "+fn) g = obograph_util.convert_json_file(fn) ont = Ontology(handle=handle, payload=g) elif handle.startswith("wdq:"): from ontobio.sparql.wikidata_ontology import EagerWikidataOntology logging.info("Fetching from Wikidata") ont = EagerWikidataOntology(handle=handle) elif handle.startswith("scigraph:"): from ontobio.neo.scigraph_ontology import RemoteScigraphOntology logging.info("Fetching from SciGraph") ont = RemoteScigraphOntology(handle=handle) elif handle.startswith("http:"): logging.info("Fetching from Web PURL: "+handle) encoded = hashlib.sha256(handle.encode()).hexdigest() #encoded = binascii.hexlify(bytes(handle, 'utf-8')) #base64.b64encode(bytes(handle, 'utf-8')) logging.info(" encoded: "+str(encoded)) fn = '/tmp/'+encoded if not os.path.isfile(fn): cmd = ['owltools',handle,'-o','-f','json',fn] cp = subprocess.run(cmd, check=True) logging.info(cp) else: logging.info("using cached file: "+fn) g = obograph_util.convert_json_file(fn) ont = Ontology(handle=handle, payload=g) else: logging.info("Fetching from SPARQL") ont = EagerRemoteSparqlOntology(handle=handle) #g = get_digraph(handle, None, True) return ont