示例#1
0
#!/usr/bin/env python3.5
import asyncio
import csv
import requests
from time import time
from os.path import expanduser
from IPython import embed
from pyontutils.scigraph_client import Vocabulary
from pyontutils.utils import chunk_list

v = Vocabulary()
curies = sorted([c for c in v.getCuriePrefixes() if c])  # 70 currently

async def fragment_getter(future_, fragments):
    loop = asyncio.get_event_loop()
    futures = []
    for fragment in fragments:
        #print(fragment)
        future = loop.run_in_executor(None, run_curies, fragment)
        futures.append((future))
    mapping = []
    for i, f in enumerate(futures):
        out = await f
        print(i, out)
        mapping.append(out)

    future_.set_result(mapping)

def check_response(puta_curie):
    out = v.findById(puta_curie)
    if out:
示例#2
0
#!/usr/bin/env python3

import csv
from os.path import expanduser
import rdflib
from pyontutils.scigraph_client import Vocabulary, Graph
from IPython import embed

dbx = 'http://www.geneontology.org/formats/oboInOwl#hasDbXref'

with open(expanduser('~/git/entity_mapping/mappings/uberon-nervous'),
          'rt') as f:
    brain_only = set([l.strip() for l in f.readlines()])

v = Vocabulary('http://localhost:9000/scigraph')
sg = Graph('http://localhost:9000/scigraph')

g = rdflib.Graph()
g.parse(expanduser('~/git/NIF-Ontology/ttl/generated/cocomacslim.ttl'),
        format='turtle')
sos = [so for so in g.subject_objects(rdflib.RDFS.label)]

map_ = []
smap_ = []
fma_lookup = {}
for s, o in sos:
    cc_id = g.qname(s)
    cc_label = o.toPython()
    existing_id = None
    existing_label = None
    existing_fma = ''
示例#3
0
#!/usr/bin/env python3

import csv
from os.path import expanduser
import rdflib
from pyontutils.scigraph_client import Vocabulary, Graph
from IPython import embed

dbx = 'http://www.geneontology.org/formats/oboInOwl#hasDbXref'

with open(expanduser('~/git/entity_mapping/mappings/uberon-nervous'), 'rt') as f:
    brain_only = set([l.strip() for l in f.readlines()])

v = Vocabulary('http://localhost:9000/scigraph')
sg = Graph('http://localhost:9000/scigraph')

g = rdflib.Graph()
g.parse(expanduser('~/git/NIF-Ontology/ttl/generated/cocomacslim.ttl'), format='turtle')
sos = [so for so in g.subject_objects(rdflib.RDFS.label)]

map_ = []
smap_ = []
fma_lookup = {}
for s, o in sos:
    cc_id = g.qname(s)
    cc_label = o.toPython()
    existing_id = None
    existing_label = None
    existing_fma = ''
    s_existing_id = None
    s_existing_label = None
from hashlib import md5
from functools import wraps
import robobrowser
import rdflib
from pyontutils.utils import makePrefixes, makeGraph
from pyontutils.scigraph_client import Vocabulary

# ilx api implementation (will change)
import csv
import requests
from io import StringIO

#debug
from IPython import embed

sgv = Vocabulary()

# scicrunch login stuff
#SC_EM = os.environ.get('SC_EM', None)
#SC_PASS = os.environ.get('SC_PASS', None)
SESS_COOKIE = None  # getSessionCookie(SC_EM, SC_PASS)

# interlex api (temp version)
ILX_SERVER = 'https://beta.scicrunch.org/'
#ILX_SERVER = 'https://scicrunch.org/'
ILX_ENDPOINT = 'forms/term-forms/term-bulk-upload.php'  # test.scicrunch.org will create real records
CID = 72  # SciCrunch community id


def makeIlxRec(label,
               definition='',
示例#5
0
    -r --reup                   redownload the data to be mapped to a local copy
"""
import csv
import json
import asyncio
from os import path
from collections import namedtuple
from datetime import datetime
import requests
from docopt import docopt
from IPython import embed
from heatmaps.services import database_service
from pyontutils.scigraph_client import Refine, Vocabulary
from exclude import exclude_table_prefixes, exclude_tables, exclude_columns

v = Vocabulary()#'http://localhost:9000/scigraph')#quiet=False)

MEMOIZE_PATH = '/home/tom/files/entity_mapping/'  # XXX set this to match your system

class discodv(database_service):
    dbname = 'disco_crawler'
    user = '******'
    host = 'nif-db.crbs.ucsd.edu'
    port = 5432
    DEBUG = True

csv_schema = (
    'source', 'table', 'column', 'value',  # loop variables
    'input_value', 'candidate', 'identifier', 'category', 'relation',
    'prov', 'eid', 'ms', 'notes',  # eid => existing id, 
)
示例#6
0
    def configGraphIO(remote_base,
                      local_base,
                      branch,
                      core_graph_paths=tuple(),
                      core_graph=None,
                      in_graph_paths=tuple(),
                      out_graph_path=None,
                      out_imports=tuple(),
                      out_graph=None,
                      force_remote=False,
                      scigraph=None):
        """ We set this up to work this way because we can't
            instantiate graphBase, it is a super class that needs
            to be configurable and it needs to do so globally.
            All the default values here are examples and not real.
            You should write a local `def config` function as part
            of your local setup that replicates that arguments of
            configureGraphIO.

            Example:
            def config(remote_base=       'http://someurl.org/remote/ontology/',
                       local_base=        '/home/user/git/ontology/',
                       branch=            'master',
                       core_graph_paths= ['local/path/localCore.ttl',
                                          'local/path/localClasses.ttl'],
                       core_graph=        None,
                       in_graph_paths=    tuple(),
                       out_graph_path=    '/tmp/outputGraph.ttl',
                       out_imports=      ['local/path/localCore.ttl'],
                       out_graph=         None,
                       force_remote=      False,
                       scigraph=          'scigraph.mydomain.org:9000'):
            graphBase.configGraphIO(remote_base, local_base, branch,
                                    core_graph_paths, core_graph,
                                    in_graph_paths,
                                    out_graph_path, out_imports, out_graph,
                                    force_remote, scigraph)

        """
        def makeLocalRemote(suffixes):
            local = [local_base + s for s in suffixes]
            remote = [remote_base + branch + '/' + s for s in suffixes]
            return local, remote

        def attachPrefixes(*prefixes, graph=None):
            return makeGraph('', prefixes=makePrefixes(*prefixes), graph=graph)

        # file location setup
        remote_core_paths, local_core_paths = makeLocalRemote(core_graph_paths)
        remote_in_paths, local_in_paths = makeLocalRemote(in_graph_paths)
        remote_out_imports, local_out_imports = makeLocalRemote(out_imports)

        out_graph_paths = [out_graph_path]
        remote_out_paths, local_out_paths = makeLocalRemote(
            out_graph_paths)  # XXX fail w/ tmp
        remote_out_paths = local_out_paths  # can't write to a remote server without magic

        if not force_remote and os.path.exists(local_base):
            repo = Repo(local_base)
            if repo.active_branch.name != branch:
                raise FileNotFoundError(
                    'Local git repo not on %s branch! Please run `git checkout %s` in %s'
                    % (branch, branch, local_base))
            use_core_paths = local_core_paths
            use_in_paths = local_in_paths
        else:
            if not force_remote:
                print("Warning local ontology path '%s' not found!" %
                      local_base)
            use_core_paths = remote_core_paths
            use_in_paths = remote_in_paths

        # core graph setup
        if core_graph is None:
            core_graph = rdflib.Graph()
        for cg in use_core_paths:
            core_graph.parse(cg, format='turtle')
        graphBase.core_graph = core_graph

        # input graph setup
        in_graph = core_graph
        for ig in use_in_paths:
            in_graph.parse(ig, format='turtle')
        nin_graph = attachPrefixes('ILXREPLACE', 'GO', 'CHEBI', graph=in_graph)
        graphBase.in_graph = in_graph

        # output graph setup
        if out_graph is None:
            out_graph = rdflib.Graph()
            # in thise case we also want to wipe any existing python Neuron entires
            # that we use to serialize so that behavior is consistent
            NeuronBase.existing_pes = {}
            NeuronBase.existing_ids = {}
        new_graph = attachPrefixes('owl',
                                   'GO',
                                   'PR',
                                   'CHEBI',
                                   'UBERON',
                                   'NCBITaxon',
                                   'ILXREPLACE',
                                   'ilx',
                                   'ILX',
                                   'NIFCELL',
                                   'NIFMOL',
                                   graph=out_graph)
        graphBase.out_graph = out_graph

        # makeGraph setup
        new_graph.filename = out_graph_path
        ontid = rdflib.URIRef('file://' + out_graph_path)
        new_graph.add_ont(ontid, 'Some Neurons')
        for remote_out_import in remote_out_imports:
            new_graph.add_node(
                ontid, 'owl:imports', rdflib.URIRef(
                    remote_out_import))  # core should be in the import closure
        graphBase.ng = new_graph

        # set predicates
        graphBase._predicates = getPhenotypePredicates(graphBase.core_graph)

        # scigraph setup
        if scigraph is not None:
            graphBase._sgv = Vocabulary(cache=True,
                                        basePath='http://' + scigraph +
                                        '/scigraph')
        else:
            graphBase._sgv = Vocabulary(cache=True)