Exemplo n.º 1
0
 def organism_version(cls, name):
     oname = cls.organism_name_search(name)
     #FIXME, dirty hack to ensure file id downloaded
     orngServerFiles.localpath_download("NCBI_geneinfo",
                                        "gene_info.%s.db" % oname)
     return orngServerFiles.info("NCBI_geneinfo",
                                 "gene_info.%s.db" % oname)["datetime"]
Exemplo n.º 2
0
    def __init__(self, organism, genematcher=None):
        """ An dictionary like object for accessing NCBI gene info
        Arguments::
                - *organism*    Organism id

        Example::
            >>> info = NCBIGeneInfo("H**o sapiens")
        """
        
        self.taxid = self.organism_name_search(organism)


        fname = orngServerFiles.localpath_download("NCBI_geneinfo", "gene_info.%s.db" % self.taxid)
        file = open(fname, "rb")
        self.update(dict([(line.split("\t", 3)[1], line) for line in file.read().splitlines() if line.strip() and not line.startswith("#")]))

        # NOTE orig init time for gene matcher: 2.5s, new 4s: investigate the slowdown
        # NOTE matches are not the same because aliases are build a bit
        # differently (main name versus old aliases conflict!)

        self.matcher = genematcher
        if self.matcher == None:
            if self.taxid == '352472':
                self.matcher = matcher([GMNCBI(self.taxid), GMDicty(), [GMNCBI(self.taxid), GMDicty()]])
            else:
                self.matcher = matcher([GMNCBI(self.taxid)])

        #if this is done with a gene matcher, pool target names
        self.matcher.set_targets(self.keys())
Exemplo n.º 3
0
 def history(self):
     if getattr(self, "_history", None) is None:
         fname = orngServerFiles.localpath_download("NCBI_geneinfo", "gene_history.%s.db" % self.taxid)
         try:
             self._history = dict([(line.split("\t")[2], GeneHistory(line)) for line in open(fname, "rb").read().splitlines()])
             
         except Exception, ex:
             print >> sys.srderr, "Loading NCBI gene history failed.", ex
             self._history = {}
Exemplo n.º 4
0
 def load(self):
     path = orngServerFiles.localpath_download(self.DOMAIN, self.FILENAME)
     lines = open(path, "rb").read().splitlines()[:-1]
     self._homologs = {}
     self._homologs = dict([((h.taxonomy_id, h.gene_symbol), h)
                            for h in [_homolog(line) for line in lines]])
     self._homologs_by_group = reduce(
         lambda dict, h: dict[h.group_id].append(h) or dict,
         self._homologs.values(), defaultdict(list))
Exemplo n.º 5
0
 def _db(self, taxid=None):
     """ Return an open sqlite3.Connection object.  
     """
     taxid = taxid or self.taxid
     filename = orngServerFiles.localpath_download("PPI",
                         self.SERVER_FILE.format(taxid=taxid))
     if not os.path.exists(filename):
         raise ValueError("Database is missing.")
     
     return sqlite3.connect(filename)
Exemplo n.º 6
0
    def _db(self, taxid=None):
        """ Return an open sqlite3.Connection object.  
        """
        taxid = taxid or self.taxid
        filename = orngServerFiles.localpath_download(
            "PPI", self.SERVER_FILE.format(taxid=taxid))
        if not os.path.exists(filename):
            raise ValueError("Database is missing.")

        return sqlite3.connect(filename)
Exemplo n.º 7
0
    def __loadOntologyFromDisk(self):
        """
        Function loads MeSH ontology and chemical annotation into internal data structures.
        """
        self.toID = dict()  # name -> [IDs] Be careful !!! One name can match many IDs!
        self.toName = dict()  # ID -> name
        self.toDesc = dict()  # name -> description
        self.fromCID = dict()  # cid -> term id
        self.fromPMID = dict()  # pmid -> term id

        d = file(orngServerFiles.localpath_download('MeSH', 'mesh-ontology.dat'))
        f = file(orngServerFiles.localpath_download('MeSH', 'cid-annotation.dat'))

        # loading ontology graph
        t = 0
        for i in d:
            t += 1
            parts = i.split("\t")  # delimiters are tabs
            if(len(parts) != 3):
                print "error reading ontology ", parts[0]
            parts[2] = parts[2].rstrip("\n\r")
            ids = parts[1].split(";")
            self.toID[parts[0]] = ids  # append additional ID
            self.toDesc[parts[0]] = parts[2]
            for r in ids:
                self.toName[r] = parts[0]
            # loading cid -> mesh
        for i in f:
            parts = i.split(";")		# delimiters are tabs
            if(len(parts) != 2):
                print "error reading ontology ", parts[0]
            parts[1] = parts[1].rstrip("\n\r")
            cid = int(parts[0])
            if self.fromCID.has_key(cid):
                self.fromCID[cid].append(parts[1])
            else:
                self.fromCID[cid] = [parts[1]]
        # loading pmid -> mesh, TODO
        # print "Current MeSH ontology contains ", t, " mesh terms."
        return True
Exemplo n.º 8
0
    def history(self):
        if getattr(self, "_history", None) is None:
            fname = orngServerFiles.localpath_download(
                "NCBI_geneinfo", "gene_history.%s.db" % self.taxid)
            try:
                self._history = dict([
                    (line.split("\t")[2], GeneHistory(line))
                    for line in open(fname, "rb").read().splitlines()
                ])

            except Exception, ex:
                print >> sys.srderr, "Loading NCBI gene history failed.", ex
                self._history = {}
Exemplo n.º 9
0
 def load(self):
     import cPickle
     dir = orngServerFiles.localpath("EnsembleGeneInfo")
     if not os.path.exists(dir):
         os.makedirs(dir)
     
     try:
         filename = orngServerFiles.localpath_download("EnsembleGeneInfo", self.filename())
         info = cPickle.load(open(filename, "rb"))
     except Exception, ex:    
         filename = orngServerFiles.localpath("EnsembleGeneInfo", self.filename())
         info = self.create_info()
         cPickle.dump(info, open(filename, "wb"))
Exemplo n.º 10
0
def load_miRNA_microCosm(org="mus_musculus", max_pvalue=None, min_score=None):
    """ Load miRNA's from microcosm into the global scope (currently
    only Mus musculus is supported)
    
    """
    global IDs, LABELS, miRNA_lib, mat_toPre, ACCtoID
    global preIDs, premiRNA_lib, preACCtoID, clusters
    global num_toClusters,  clusters_toNum
    
    file = osf.localpath_download("miRNA", "v5.txt.{org}".format(org=org))
    [IDs, LABELS, miRNA_lib, mat_toPre, ACCtoID] = parse_targets_microcosm_v5(file,
                                max_pvalue=max_pvalue, min_score=min_score)
    [preIDs, premiRNA_lib, preACCtoID, clusters] = [], {}, {}, {}
    num_toClusters, clusters_toNum = {}, {}
Exemplo n.º 11
0
    def load(self):
        import cPickle
        dir = orngServerFiles.localpath("EnsembleGeneInfo")
        if not os.path.exists(dir):
            os.makedirs(dir)

        try:
            filename = orngServerFiles.localpath_download(
                "EnsembleGeneInfo", self.filename())
            info = cPickle.load(open(filename, "rb"))
        except Exception, ex:
            filename = orngServerFiles.localpath("EnsembleGeneInfo",
                                                 self.filename())
            info = self.create_info()
            cPickle.dump(info, open(filename, "wb"))
Exemplo n.º 12
0
def load_miRNA_microCosm(org="mus_musculus", max_pvalue=None, min_score=None):
    """ Load miRNA's from microcosm into the global scope (currently
    only Mus musculus is supported)
    
    """
    global IDs, LABELS, miRNA_lib, mat_toPre, ACCtoID
    global preIDs, premiRNA_lib, preACCtoID, clusters
    global num_toClusters, clusters_toNum

    file = osf.localpath_download("miRNA", "v5.txt.{org}".format(org=org))
    [IDs, LABELS, miRNA_lib, mat_toPre,
     ACCtoID] = parse_targets_microcosm_v5(file,
                                           max_pvalue=max_pvalue,
                                           min_score=min_score)
    [preIDs, premiRNA_lib, preACCtoID, clusters] = [], {}, {}, {}
    num_toClusters, clusters_toNum = {}, {}
Exemplo n.º 13
0
    def __init__(self, organism, genematcher=None):
        """ An dictionary like object for accessing NCBI gene info
        Arguments::
                - *organism*    Organism id

        Example::
            >>> info = NCBIGeneInfo("H**o sapiens")
        """

        self.taxid = self.organism_name_search(organism)

        fname = orngServerFiles.localpath_download(
            "NCBI_geneinfo", "gene_info.%s.db" % self.taxid)
        file = open(fname, "rb")
        self.update(
            dict([(line.split("\t", 3)[1], line)
                  for line in file.read().splitlines()
                  if line.strip() and not line.startswith("#")]))

        # NOTE orig init time for gene matcher: 2.5s, new 4s: investigate the slowdown
        # NOTE matches are not the same because aliases are build a bit
        # differently (main name versus old aliases conflict!)

        self.matcher = genematcher
        if self.matcher == None:
            if self.taxid == '352472':
                self.matcher = matcher([
                    GMNCBI(self.taxid),
                    GMDicty(), [GMNCBI(self.taxid),
                                GMDicty()]
                ])
            else:
                self.matcher = matcher([GMNCBI(self.taxid)])

        #if this is done with a gene matcher, pool target names
        self.matcher.set_targets(self.keys())
Exemplo n.º 14
0
def list_serverfiles():
    fname = orngServerFiles.localpath_download(sfdomain, "index.pck")
    flist = pickle.load(open(fname, 'r'))
    return list_serverfiles_from_flist(flist)
Exemplo n.º 15
0
from __future__ import absolute_import, division

from collections import defaultdict
import math, os, random, re, urllib

from Orange.orng import orngServerFiles as osf
import statc

from . import gene as ge, go, kegg as kg, utils, taxonomy as obiTaxonomy

op = utils.stats

mirnafile = osf.localpath_download('miRNA', 'miRNA.txt')
premirnafile = osf.localpath_download('miRNA', 'premiRNA.txt')

################################################################################################################
################################################################################################################


def __build_lib(filename,
                labels=True,
                MATtoPRE=True,
                ACCtoID=True,
                clust=False):
    """
    build_lib() function takes as input a filename
    and gives as output some variables there will be used in
    the module.
    """
    content = [l.rstrip() for l in open(filename).readlines()][1:]
    to_return = []
Exemplo n.º 16
0
 def create_aliases_version(self):
     orngServerFiles.localpath_download("Affy", self.organism + ".pickle")
     return orngServerFiles.info("Affy",
                                 self.organism + ".pickle")["datetime"]
Exemplo n.º 17
0
 def create_aliases(self):
     filename = orngServerFiles.localpath_download(
         "Affy", self.organism + ".pickle")
     import cPickle
     return cPickle.load(open(filename, "rb"))
Exemplo n.º 18
0
 def organism_version(cls, name):
     oname = cls.organism_name_search(name)
     #FIXME, dirty hack to ensure file id downloaded
     orngServerFiles.localpath_download("NCBI_geneinfo", "gene_info.%s.db" % oname) 
     return orngServerFiles.info("NCBI_geneinfo", "gene_info.%s.db" % oname)["datetime"]
Exemplo n.º 19
0
 def create_aliases_version(self):
     orngServerFiles.localpath_download("Affy", self.organism + ".pickle")
     return orngServerFiles.info("Affy", self.organism + ".pickle")["datetime"]
Exemplo n.º 20
0
 def create_aliases(self):
     filename = orngServerFiles.localpath_download("Affy", self.organism + ".pickle")
     import cPickle
     return cPickle.load(open(filename, "rb"))
Exemplo n.º 21
0
def load_serverfiles(hierarchy, organism):
    return load_fn(hierarchy, organism, list_serverfiles, 
        lambda h,o: orngServerFiles.localpath_download(sfdomain, filename(h, o)))
Exemplo n.º 22
0
 def load(self):
     path = orngServerFiles.localpath_download(self.DOMAIN, self.FILENAME)
     lines = open(path, "rb").read().splitlines()[:-1]
     self._homologs = {} 
     self._homologs = dict([((h.taxonomy_id, h.gene_symbol), h) for h in [_homolog(line) for line in lines]])
     self._homologs_by_group = reduce(lambda dict, h: dict[h.group_id].append(h) or dict, self._homologs.values(), defaultdict(list))
Exemplo n.º 23
0
def list_serverfiles():
    fname = orngServerFiles.localpath_download(sfdomain, "index.pck")
    flist = pickle.load(open(fname, 'r'))
    return list_serverfiles_from_flist(flist)
Exemplo n.º 24
0
from __future__ import absolute_import, division

from collections import defaultdict
import math, os, random, re, urllib

from Orange.orng import orngServerFiles as osf
import statc

from . import gene as ge, go, kegg as kg, utils, taxonomy as obiTaxonomy

op = utils.stats

mirnafile = osf.localpath_download('miRNA','miRNA.txt')
premirnafile = osf.localpath_download('miRNA','premiRNA.txt')

################################################################################################################
################################################################################################################

def __build_lib(filename, labels=True, MATtoPRE=True, ACCtoID=True, clust=False):
    """
    build_lib() function takes as input a filename
    and gives as output some variables there will be used in
    the module.
    """
    content = [l.rstrip() for l in open(filename).readlines()][1:]
    to_return = []
    
    ids = [l.split('\t')[0] for l in content]
    to_return.append(ids)
    
    if labels: 
Exemplo n.º 25
0
 def __init__(self):
     import sqlite3
     self.con = sqlite3.connect(
         orngServerFiles.localpath_download("HomoloGene",
                                            "InParanoid.sqlite"))
Exemplo n.º 26
0
def load_serverfiles(hierarchy, organism):
    return load_fn(hierarchy, organism, list_serverfiles, 
        lambda h,o: orngServerFiles.localpath_download(sfdomain, filename(h, o)))
Exemplo n.º 27
0
 def __init__(self):
     import sqlite3
     self.con = sqlite3.connect(orngServerFiles.localpath_download("HomoloGene", "InParanoid.sqlite"))