Exemplo n.º 1
0
 def get_config(self):
     result = {}
     try:
         result = self.context.config.get_service(self.name)
     except:
         logger = LoggingUtil.init_logging(__name__)
         logger.warn(f"Unable to get config for service: {self.name}")
     return result
Exemplo n.º 2
0
import gensim.models
import json
import logging
import os
import requests
import time
from greent.service import Service
from greent.service import ServiceContext
from greent.util import LoggingUtil
from pprint import pformat

logger = LoggingUtil.init_logging(__name__, level=logging.DEBUG)

class Chemotext2 (Service):
    """ RENCI used the word2vec machine learning algorithm to compute teh semantic similarity of terms in the public access
    subset of the PubMed Central full text journal article corpus. Among other things, word2vec mdoels let us interrogate
    the "distance" between two terms. For a full explanation of the significance of distance, see the literature on word 
    embedding models and the word2vec algorithm. For our purposes, it's a sophisticated view of cooccurrence. """
    def __init__(self, context): #url="https://www.ebi.ac.uk/spot/oxo/api/search?size=500"):
        super(Chemotext2, self).__init__("chemotext2", context)
        logger.debug ("Ensuring presence of word to vec pubmed central models: {0}".format (self.url))
        files = [
            "pmc-2016.w2v", "pmc-2016.w2v.syn0.npy", "pmc-2016.w2v.syn1neg.npy",
            "bigram-2016.w2v", "bigram-2016.w2v.syn0.npy", "bigram-2016.w2v.syn1neg.npy"
        ]
        for f in files:
            if os.path.exists (f):
                continue
            logger.debug ("  --downloading word embedding model component: {0}".format (f))
            url = "{0}/{1}".format (self.url, f)
            r = requests.get (url, stream=True)
Exemplo n.º 3
0
import json
import os
import sys
from collections import defaultdict
from pprint import pformat
from pprint import pprint
from greent.client import GraphQL
from collections import namedtuple
from flask_testing import LiveServerTestCase
from greent.util import LoggingUtil
import networkx as nx
import networkx.algorithms as nxa

logger = LoggingUtil.init_logging(__file__)


class Vocab(object):
    root_kind = 'http://identifiers.org/doi'

    # MESH
    mesh = 'http://identifiers.org/mesh'
    mesh_disease_name = 'http://identifiers.org/mesh/disease/name'
    mesh_drug_name = 'http://identifiers.org/mesh/drug/name'
    mesh_disease_id = 'http://identifiers.org/mesh/disease/id'

    # Disease
    doid_curie = "doid"
    doid = "http://identifiers.org/doid"
    pharos_disease_name = "http://pharos.nih.gov/identifier/disease/name"

    # DRUG
Exemplo n.º 4
0
from ftplib import FTP
from greent import node_types
from greent.graph_components import KNode, LabeledID
from greent.service import Service
from greent.util import LoggingUtil
import logging, re, os, sys
from collections import defaultdict
from robokop_genetics.genetics_normalization import GeneticsNormalizer
from greent.export_delegator import WriterDelegator
from greent.rosetta import Rosetta

logger = LoggingUtil.init_logging(
    "robo-commons.builder.gwascatalog",
    logging.DEBUG,
    format='medium',
    logFilePath=f'{os.environ["ROBOKOP_HOME"]}/logs/')


class GWASCatalog(Service):
    def __init__(self, rosetta):
        self.is_cached_already = False
        self.genetics_normalizer = GeneticsNormalizer()
        self.rosetta = rosetta
        self.writer = WriterDelegator(rosetta)
        self.version = '2020/05/04'
        self.sequence_variant_export_labels = None

    def process_gwas(self):
        # main entry point
        gwas_file = self.get_gwas_file()
        self.parse_gwas_file(gwas_catalog=gwas_file)
Exemplo n.º 5
0
import requests
import json
from greent.service import Service
from greent.util import LoggingUtil

logger = LoggingUtil.init_logging(__name__)


class CHEMBL(Service):
    def __init__(self, name, context):
        super(CHEMBL, self).__init__(name, context)
        self.name = name

    def get_label(self, identifier):
        obj = requests.get(url=f"{self.url}/data/compound_record/{identifier}",
                           headers={
                               "Accept": "application/json"
                           }).json()
        return {"label": obj.get("compound_name", "")}
Exemplo n.º 6
0
from string import Template
import json
import os
import logging
from greent.service import Service
from greent.triplestore import TripleStore
from greent.util import LoggingUtil
from greent.util import Text
from greent import node_types
from pprint import pprint
import datetime
from collections import defaultdict
import time
import psycopg2

logger = LoggingUtil.init_logging(__name__, logging.INFO)


class OmniCorp(Service):
    def __init__(self, context):  #triplestore):
        super(OmniCorp, self).__init__("omnicorp", context)
        db = context.config['POSTGRES_DB']
        user = context.config['POSTGRES_USER']
        port = context.config['POSTGRES_PORT']
        host = context.config['POSTGRES_HOST']
        password = context.config['POSTGRES_PASSWORD']
        self.prefixes = set([
            'UBERON', 'BSPO', 'PATO', 'GO', 'MONDO', 'HP', 'ENVO', 'OBI', 'CL',
            'SO', 'CHEBI', 'HGNC', 'MESH'
        ])
        self.conn = psycopg2.connect(dbname=db,
from greent.rosetta import Rosetta
from greent import node_types
from greent.graph_components import KNode
from greent.export_delegator import WriterDelegator
from greent.util import LoggingUtil
from greent.util import Text
from builder.gtex_utils import GTExUtils
from builder.question import LabeledID
import csv
import os

# declare a logger and initialize it.
import logging
logger = LoggingUtil.init_logging(
    "robo-commons.builder.GTExBuilder",
    logging.INFO,
    format='medium',
    logFilePath=f'{os.environ["ROBOKOP_HOME"]}/logs/')


##############
# Class: GTExBuilder
#
# By: Phil Owen
# Date: 5/21/2019
# Desc: Class that pre-loads significant GTEx data elements into a neo4j graph database.
##############
class GTExBuilder:
    #######
    # Constructor
    # param rosetta : Rosetta - project object for shared objects
# This annotator will be used for all nodes regardless of thier type 
# Things like literary synonymization (collecting different names for node)
# adding names 
# or anything we'd like to apply in the general sense.
################

from greent.annotators.annotator import Annotator
from builder.question import LabeledID
import asyncio
from greent.util import Text, LoggingUtil
import logging
import traceback
from greent import node_types
import requests

logger = LoggingUtil.init_logging(__name__, level=logging.DEBUG, format='medium')

class GenericAnnotator(Annotator):
    """
    Singleton class to perform our generic annotation tasks.
    """
    instance = None

    def __init__(self, rosetta):
        if not self.instance:
            self.instance = GenericAnnotator.__GenericAnnotator(rosetta)
    def __getattr__(self, name):
        return getattr(self.instance, name)

    def annotate(self, node):
        # Overriding this method with the generic way
Exemplo n.º 9
0
from greent.util import Text, LoggingUtil
from greent.graph_components import KNode
from greent import node_types
from greent.services.myvariant import MyVariant
from greent.export_delegator import WriterDelegator
from greent.cache import Cache
from builder.gtex_builder import GTExBuilder
from builder.question import LabeledID
from crawler.crawl_util import query_the_graph

import logging
import pickle
import os

logger = LoggingUtil.init_logging("robokop-interfaces.crawler.sequence_variants", level=logging.INFO, logFilePath=f'{os.environ["ROBOKOP_HOME"]}/logs/')

default_gtex_file = 'signif_variant_gene_pairs.csv'

def load_gwas_knowledge(rosetta: object, limit: int = None):
    synonymizer = rosetta.synonymizer
    gwas_catalog_dict = rosetta.core.gwascatalog.prepopulate_cache()
    counter = 0
    with WriterDelegator(rosetta) as writer:
        for variant_node, relationships in gwas_catalog_dict.items():
            if relationships:
                writer.write_node(variant_node)
                for (gwas_edge, phenotype_node) in relationships:
                    # these phenotypes are probably already in the DB, but not necessarily
                    writer.write_node(phenotype_node)
                    writer.write_edge(gwas_edge)
            else:
Exemplo n.º 10
0
import os
import logging
import requests
from greent.util import Resource, LoggingUtil, Text
from greent.graph_components import LabeledID
from collections import defaultdict
import itertools
from collections import OrderedDict
import urllib.parse

logger = LoggingUtil.init_logging(__name__, logging.DEBUG, 'short')


#TODO: should all of this be done with some sort of canned semantic tools?
class Concept:
    """ A semantic type or concept. A high level idea comprising one or more identifier namespace.
    Provides rudimentary notion of specialization via is_a. """
    def __init__(self, name, is_a, id_prefixes):
        self.name = name
        #Only a single parent?
        self.is_a = is_a
        is_a_name = is_a.name if is_a else None
        self.id_prefixes = [] if id_prefixes is None else id_prefixes

    def __repr__(self):
        return f"Concept(name={self.name},is_a={self.is_a is not None},id_prefixes={self.id_prefixes})"
        #return f"Concept(name={self.name},is_a={self.is_a},id_prefixes={self.id_prefixes})"


class Relationship:
    """ A semantic type for a relationship (or slot)
Exemplo n.º 11
0
import logging
from greent.graph_components import KEdge
from greent.util import Text
from greent.util import LoggingUtil
from greent import node_types
from collections import defaultdict
from datetime import datetime as dt

logger = LoggingUtil.init_logging (__file__, logging.DEBUG)

def get_supporter(greent):
    return OmnicorpSupport(greent)


class OmnicorpSupport():

    def __init__(self,greent):
        self.greent = greent
        self.omnicorp = greent.omnicorp

    def term_to_term(self,node_a,node_b):
        articles = self.omnicorp.get_shared_pmids(node_a, node_b)
        #logger.debug(f'OmniCorp {node_a.identifier} {node_b.identifier}')
        if len(articles) > 0:
            #logger.debug(f'    -> {len(articles)}')
            pmids = [f'PMID:{x.split("/")[-1]}' for x in articles]
            ke = KEdge('omnicorp.term_to_term', dt.now(), 'omnicorp:1', 'literature_co-occurence',
                       f'{node_a.identifier},{node_b.identifier}','omnicorp:1','literature_co-occurence',publications=pmids,
                       is_support=True)
            ke.source_node = node_a
            ke.target_node = node_b
Exemplo n.º 12
0
import os
import pickle
import pika
import logging
import requests
from greent.util import LoggingUtil
from greent.export import BufferedWriter
from greent.annotators.annotator_factory import annotate_shortcut
import traceback

logger = LoggingUtil.init_logging(
    "builder.writer_delegate",
    level=logging.DEBUG,
    logFilePath=f'{os.environ["ROBOKOP_HOME"]}/logs/')


class WriterDelegator:
    def __init__(self, rosetta, push_to_queue=False):
        self.rosetta = rosetta
        self.synonymizer = rosetta.synonymizer
        response = requests.get(f"{os.environ['BROKER_API']}queues/")
        queues = response.json()
        num_consumers = [
            q['consumers'] for q in queues if q['name'] == 'neo4j'
        ]
        if (num_consumers and num_consumers[0]) or push_to_queue:
            self.connection = pika.BlockingConnection(
                pika.ConnectionParameters(
                    heartbeat=0,
                    host=os.environ['BROKER_HOST'],
                    virtual_host='builder',
Exemplo n.º 13
0
import requests
import asyncio
from greent.annotators.chemical_annotator import ChemicalAnnotator
from crawler.chebi import pull_chebi, chebi_sdf_entry_to_dict
from crawler.pullers import pull_uniprot
from crawler.pullers import pull_iuphar
import ftplib
import pandas
import urllib
import gzip
from crawler.big_gz_sort import batch_sort
from collections import defaultdict

logger = LoggingUtil.init_logging(
    "robokop-interfaces.crawler.chemicals",
    logging.DEBUG,
    format='medium',
    logFilePath=f'{os.environ["ROBOKOP_HOME"]}/logs/')

#def pull(location, directory, filename):
#    data = pull_via_ftp(location, directory, filename)
#    rdf = decompress(data).decode()
#    return rdf


def make_mesh_id(mesh_uri):
    return f"mesh:{mesh_uri.split('/')[-1][:-1]}"


def pull_mesh_chebi():
    url = 'https://query.wikidata.org/sparql?format=json&query=SELECT ?chebi ?mesh WHERE { ?compound wdt:P683 ?chebi . ?compound wdt:P486 ?mesh. }'
Exemplo n.º 14
0
import os
import sys
from time import sleep, strftime
from datetime import datetime
import logging
import json

import pika

from greent.util import LoggingUtil
from greent.export import BufferedWriter
from builder.buildmain import setup
from greent.graph_components import KNode, KEdge
from builder.api import logging_config

logger = LoggingUtil.init_logging("builder.writer", level=logging.DEBUG)

greent_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
sys.path.insert(0, greent_path)
rosetta = setup(os.path.join(greent_path, 'greent', 'greent.conf'))

connection = pika.BlockingConnection(pika.ConnectionParameters(host=os.environ['BROKER_HOST'],
    virtual_host='builder',
    credentials=pika.credentials.PlainCredentials(os.environ['BROKER_USER'], os.environ['BROKER_PASSWORD'])))
channel = connection.channel()

channel.queue_declare(queue='neo4j')

writer = BufferedWriter(rosetta)

def callback(ch, method, properties, body):
Exemplo n.º 15
0
import pika

from greent.util import LoggingUtil
from greent.export import BufferedWriter
from builder.buildmain import setup
from greent.graph_components import KNode, KEdge
from builder.api import logging_config
from pika.exceptions import StreamLostError
import threading
from functools import partial

greent_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
logger = LoggingUtil.init_logging("builder.writer",
                                  level=logging.DEBUG,
                                  logFilePath=os.path.join(
                                      greent_path, '..', 'logs',
                                      'builder.writer'))

greent_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
sys.path.insert(0, greent_path)
rosetta = setup(os.path.join(greent_path, 'greent', 'greent.conf'))


def callback_wrapper(ch, method, properties, body, writer):
    ## This is basically going to create a thread for the handler and call it , and let it finish.
    ## to avoid blocking the rabbit heartbeat.
    # Found out that rabbitmq will reset connections for channels
    # and sometimes it is the case that neo4j related things take time
    # and the broker decides this client is no longer active and kills the connection
    # so to avoid dropping of any incoming data that might, have not been written we can
Exemplo n.º 16
0
import requests
from ftplib import FTP
from greent import node_types
from greent.graph_components import KNode, LabeledID
from greent.service import Service
from greent.util import Text, LoggingUtil
import logging, json, pickle, re, os, sys
from collections import defaultdict
from greent.cache import Cache

logger = LoggingUtil.init_logging(
    "robokop-interfaces.services.gwascatalog",
    logging.DEBUG,
    format='medium',
    logFilePath=f'{os.environ["ROBOKOP_HOME"]}/logs/')


class GWASCatalog(Service):
    def __init__(self, context, rosetta):
        super(GWASCatalog, self).__init__("gwascatalog", context)
        self.is_cached_already = False
        self.synonymizer = rosetta.synonymizer

    def prepopulate_cache(self):
        query_url = 'ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-associations_ontology-annotated.tsv'
        ftpsite = 'ftp.ebi.ac.uk'
        ftpdir = '/pub/databases/gwas/releases/latest'
        ftpfile = 'gwas-catalog-associations_ontology-annotated.tsv'
        ftp = FTP(ftpsite)
        ftp.login()
        ftp.cwd(ftpdir)