示例#1
0
def get_scigraph_nodes(id_list) -> Iterator[Dict]:
    """
    Queries scigraph neighbors to get a list of nodes back

    We use the scigraph neighbors function because ids can be sent in batch
    which is faster than iteratively querying solr search
    or the scigraph graph/id function

    :return: json decoded result from scigraph_ontology._neighbors_graph
    :raises ValueError: If id is not in scigraph
    """
    scigraph = RemoteScigraphOntology('scigraph:data')

    chunks = [id_list[i:i + 100] for i in range(0, len(list(id_list)), 100)]
    for chunk in chunks:
        params = {'id': chunk, 'depth': 0}

        try:
            result_graph = scigraph._neighbors_graph(**params)
            for node in result_graph['nodes']:
                yield node
        except JSONDecodeError as exception:
            # Assume json decode is due to an incorrect class ID
            # Should we handle this?
            raise ValueError(exception.doc)
示例#2
0
def create_ontology(handle=None, **args):
    ont = None
    logging.info("Determining strategy to load '{}' into memory...".format(handle))

    if handle.find("+") > -1:
        handles = handle.split("+")
        onts = [create_ontology(ont) for ont in handles]
        ont = onts.pop()
        ont.merge(onts)
        return ont

    # TODO: consider replacing with plugin architecture
    if handle.find(".") > 0 and os.path.isfile(handle):
        logging.info("Fetching obograph-json file from filesystem")
        ont = translate_file_to_ontology(handle, **args)
    elif handle.startswith("obo:"):
        logging.info("Fetching from OBO PURL")
        if handle.find(".") == -1:
            handle += '.owl'
        fn = '/tmp/'+handle
        if not os.path.isfile(fn):
            url = handle.replace("obo:","http://purl.obolibrary.org/obo/")
            cmd = ['owltools',url,'-o','-f','json',fn]
            cp = subprocess.run(cmd, check=True)
            logging.info(cp)
        else:
            logging.info("using cached file: "+fn)
        g = obograph_util.convert_json_file(fn)
        ont = Ontology(handle=handle, payload=g)
    elif handle.startswith("wdq:"):
        from ontobio.sparql.wikidata_ontology import EagerWikidataOntology
        logging.info("Fetching from Wikidata")
        ont = EagerWikidataOntology(handle=handle)
    elif handle.startswith("scigraph:"):
        from ontobio.neo.scigraph_ontology import RemoteScigraphOntology
        logging.info("Fetching from SciGraph")
        ont = RemoteScigraphOntology(handle=handle)
    elif handle.startswith("http:"):
        logging.info("Fetching from Web PURL: "+handle)
        encoded = hashlib.sha256(handle.encode()).hexdigest()
        #encoded = binascii.hexlify(bytes(handle, 'utf-8'))
        #base64.b64encode(bytes(handle, 'utf-8'))
        logging.info(" encoded: "+str(encoded))
        fn = '/tmp/'+encoded
        if not os.path.isfile(fn):
            cmd = ['owltools',handle,'-o','-f','json',fn]
            cp = subprocess.run(cmd, check=True)
            logging.info(cp)
        else:
            logging.info("using cached file: "+fn)
        g = obograph_util.convert_json_file(fn)
        ont = Ontology(handle=handle, payload=g)
    else:
        logging.info("Fetching from SPARQL")
        ont = EagerRemoteSparqlOntology(handle=handle)
        #g = get_digraph(handle, None, True)
    return ont