#!/usr/bin/env python3.5 import asyncio import csv import requests from time import time from os.path import expanduser from IPython import embed from pyontutils.scigraph_client import Vocabulary from pyontutils.utils import chunk_list v = Vocabulary() curies = sorted([c for c in v.getCuriePrefixes() if c]) # 70 currently async def fragment_getter(future_, fragments): loop = asyncio.get_event_loop() futures = [] for fragment in fragments: #print(fragment) future = loop.run_in_executor(None, run_curies, fragment) futures.append((future)) mapping = [] for i, f in enumerate(futures): out = await f print(i, out) mapping.append(out) future_.set_result(mapping) def check_response(puta_curie): out = v.findById(puta_curie) if out:
#!/usr/bin/env python3 import csv from os.path import expanduser import rdflib from pyontutils.scigraph_client import Vocabulary, Graph from IPython import embed dbx = 'http://www.geneontology.org/formats/oboInOwl#hasDbXref' with open(expanduser('~/git/entity_mapping/mappings/uberon-nervous'), 'rt') as f: brain_only = set([l.strip() for l in f.readlines()]) v = Vocabulary('http://localhost:9000/scigraph') sg = Graph('http://localhost:9000/scigraph') g = rdflib.Graph() g.parse(expanduser('~/git/NIF-Ontology/ttl/generated/cocomacslim.ttl'), format='turtle') sos = [so for so in g.subject_objects(rdflib.RDFS.label)] map_ = [] smap_ = [] fma_lookup = {} for s, o in sos: cc_id = g.qname(s) cc_label = o.toPython() existing_id = None existing_label = None existing_fma = ''
#!/usr/bin/env python3 import csv from os.path import expanduser import rdflib from pyontutils.scigraph_client import Vocabulary, Graph from IPython import embed dbx = 'http://www.geneontology.org/formats/oboInOwl#hasDbXref' with open(expanduser('~/git/entity_mapping/mappings/uberon-nervous'), 'rt') as f: brain_only = set([l.strip() for l in f.readlines()]) v = Vocabulary('http://localhost:9000/scigraph') sg = Graph('http://localhost:9000/scigraph') g = rdflib.Graph() g.parse(expanduser('~/git/NIF-Ontology/ttl/generated/cocomacslim.ttl'), format='turtle') sos = [so for so in g.subject_objects(rdflib.RDFS.label)] map_ = [] smap_ = [] fma_lookup = {} for s, o in sos: cc_id = g.qname(s) cc_label = o.toPython() existing_id = None existing_label = None existing_fma = '' s_existing_id = None s_existing_label = None
from hashlib import md5 from functools import wraps import robobrowser import rdflib from pyontutils.utils import makePrefixes, makeGraph from pyontutils.scigraph_client import Vocabulary # ilx api implementation (will change) import csv import requests from io import StringIO #debug from IPython import embed sgv = Vocabulary() # scicrunch login stuff #SC_EM = os.environ.get('SC_EM', None) #SC_PASS = os.environ.get('SC_PASS', None) SESS_COOKIE = None # getSessionCookie(SC_EM, SC_PASS) # interlex api (temp version) ILX_SERVER = 'https://beta.scicrunch.org/' #ILX_SERVER = 'https://scicrunch.org/' ILX_ENDPOINT = 'forms/term-forms/term-bulk-upload.php' # test.scicrunch.org will create real records CID = 72 # SciCrunch community id def makeIlxRec(label, definition='',
-r --reup redownload the data to be mapped to a local copy """ import csv import json import asyncio from os import path from collections import namedtuple from datetime import datetime import requests from docopt import docopt from IPython import embed from heatmaps.services import database_service from pyontutils.scigraph_client import Refine, Vocabulary from exclude import exclude_table_prefixes, exclude_tables, exclude_columns v = Vocabulary()#'http://localhost:9000/scigraph')#quiet=False) MEMOIZE_PATH = '/home/tom/files/entity_mapping/' # XXX set this to match your system class discodv(database_service): dbname = 'disco_crawler' user = '******' host = 'nif-db.crbs.ucsd.edu' port = 5432 DEBUG = True csv_schema = ( 'source', 'table', 'column', 'value', # loop variables 'input_value', 'candidate', 'identifier', 'category', 'relation', 'prov', 'eid', 'ms', 'notes', # eid => existing id, )
def configGraphIO(remote_base, local_base, branch, core_graph_paths=tuple(), core_graph=None, in_graph_paths=tuple(), out_graph_path=None, out_imports=tuple(), out_graph=None, force_remote=False, scigraph=None): """ We set this up to work this way because we can't instantiate graphBase, it is a super class that needs to be configurable and it needs to do so globally. All the default values here are examples and not real. You should write a local `def config` function as part of your local setup that replicates that arguments of configureGraphIO. Example: def config(remote_base= 'http://someurl.org/remote/ontology/', local_base= '/home/user/git/ontology/', branch= 'master', core_graph_paths= ['local/path/localCore.ttl', 'local/path/localClasses.ttl'], core_graph= None, in_graph_paths= tuple(), out_graph_path= '/tmp/outputGraph.ttl', out_imports= ['local/path/localCore.ttl'], out_graph= None, force_remote= False, scigraph= 'scigraph.mydomain.org:9000'): graphBase.configGraphIO(remote_base, local_base, branch, core_graph_paths, core_graph, in_graph_paths, out_graph_path, out_imports, out_graph, force_remote, scigraph) """ def makeLocalRemote(suffixes): local = [local_base + s for s in suffixes] remote = [remote_base + branch + '/' + s for s in suffixes] return local, remote def attachPrefixes(*prefixes, graph=None): return makeGraph('', prefixes=makePrefixes(*prefixes), graph=graph) # file location setup remote_core_paths, local_core_paths = makeLocalRemote(core_graph_paths) remote_in_paths, local_in_paths = makeLocalRemote(in_graph_paths) remote_out_imports, local_out_imports = makeLocalRemote(out_imports) out_graph_paths = [out_graph_path] remote_out_paths, local_out_paths = makeLocalRemote( out_graph_paths) # XXX fail w/ tmp remote_out_paths = local_out_paths # can't write to a remote server without magic if not force_remote and os.path.exists(local_base): repo = Repo(local_base) if repo.active_branch.name != branch: raise FileNotFoundError( 'Local git repo not on %s branch! Please run `git checkout %s` in %s' % (branch, branch, local_base)) use_core_paths = local_core_paths use_in_paths = local_in_paths else: if not force_remote: print("Warning local ontology path '%s' not found!" % local_base) use_core_paths = remote_core_paths use_in_paths = remote_in_paths # core graph setup if core_graph is None: core_graph = rdflib.Graph() for cg in use_core_paths: core_graph.parse(cg, format='turtle') graphBase.core_graph = core_graph # input graph setup in_graph = core_graph for ig in use_in_paths: in_graph.parse(ig, format='turtle') nin_graph = attachPrefixes('ILXREPLACE', 'GO', 'CHEBI', graph=in_graph) graphBase.in_graph = in_graph # output graph setup if out_graph is None: out_graph = rdflib.Graph() # in thise case we also want to wipe any existing python Neuron entires # that we use to serialize so that behavior is consistent NeuronBase.existing_pes = {} NeuronBase.existing_ids = {} new_graph = attachPrefixes('owl', 'GO', 'PR', 'CHEBI', 'UBERON', 'NCBITaxon', 'ILXREPLACE', 'ilx', 'ILX', 'NIFCELL', 'NIFMOL', graph=out_graph) graphBase.out_graph = out_graph # makeGraph setup new_graph.filename = out_graph_path ontid = rdflib.URIRef('file://' + out_graph_path) new_graph.add_ont(ontid, 'Some Neurons') for remote_out_import in remote_out_imports: new_graph.add_node( ontid, 'owl:imports', rdflib.URIRef( remote_out_import)) # core should be in the import closure graphBase.ng = new_graph # set predicates graphBase._predicates = getPhenotypePredicates(graphBase.core_graph) # scigraph setup if scigraph is not None: graphBase._sgv = Vocabulary(cache=True, basePath='http://' + scigraph + '/scigraph') else: graphBase._sgv = Vocabulary(cache=True)