示例#1
0
文件: Taxon.py 项目: Moxikai/pubMunch
class Tree(object):
    def _loadNames(self, dmpDir):
        self.names = MultiDict()
        self.sciNames = dict()
        sciName = "scientific name"

        fh = dmpFileParser(dmpDir + "/names.dmp")
        for row in fh:
            name = Name(row)
            self.names[name.taxId] = name
            if name.nameClass == sciName:
                self.sciNames[name.nameTxt] = name.taxId

    def _loadNodes(self, dmpDir):
        self.nodes = dict()
        self.parentNodeRefs = MultiDict()
        fh = dmpFileParser(dmpDir + "/nodes.dmp")
        for row in fh:
            node = Node(row)
            self.nodes[node.taxId] = node
            self.parentNodeRefs.add(node.parentTaxId, node)
        
    def __init__(self, dmpDir):
        "parse *.dmp files in dmpDir"
        self._loadNames(dmpDir)
        self._loadNodes(dmpDir)
示例#2
0
class Tree(object):
    def _loadNames(self, dmpDir):
        self.names = MultiDict()
        self.sciNames = dict()
        sciName = "scientific name"

        fh = dmpFileParser(dmpDir + "/names.dmp")
        for row in fh:
            name = Name(row)
            self.names[name.taxId] = name
            if name.nameClass == sciName:
                self.sciNames[name.nameTxt] = name.taxId

    def _loadNodes(self, dmpDir):
        self.nodes = dict()
        self.parentNodeRefs = MultiDict()
        fh = dmpFileParser(dmpDir + "/nodes.dmp")
        for row in fh:
            node = Node(row)
            self.nodes[node.taxId] = node
            self.parentNodeRefs.add(node.parentTaxId, node)

    def __init__(self, dmpDir):
        "parse *.dmp files in dmpDir"
        self._loadNames(dmpDir)
        self._loadNodes(dmpDir)
示例#3
0
class GenePredTbl(list):
    """Table of GenePred objects loaded from a tab-file"""
    def __init__(self, fileName, buildIdx=False, buildUniqIdx=False, buildRangeIdx=False):
        if buildIdx and buildUniqIdx:
            raise Exception("can't specify both buildIdx and buildUniqIdx")
        for row in GenePredReader(fileName):
            self.append(row)
        self.names = None
        self.rangeMap = None
        if buildUniqIdx:
            self.__buildUniqIdx()
        if buildIdx:
            self.__buildIdx()
        if buildRangeIdx:
            self.__buildRangeIdx()

    def __buildUniqIdx(self):
        self.names = dict()
        for row in self:
            if row.name in self.names:
                raise Exception("gene with this name already in index: " + row.name)
            self.names[row.name] = row

    def __buildIdx(self):
        from pycbio.sys.MultiDict import MultiDict
        self.names = MultiDict()
        for row in self:
            self.names.add(row.name, row)

    def __buildRangeIdx(self):
        from pycbio.hgdata.RangeFinder import RangeFinder
        self.rangeMap = RangeFinder()
        for gene in self:
            self.rangeMap.add(gene.chrom, gene.txStart, gene.txEnd, gene, gene.strand)
示例#4
0
 def _loadNodes(self, dmpDir):
     self.nodes = dict()
     self.parentNodeRefs = MultiDict()
     fh = dmpFileParser(dmpDir + "/nodes.dmp")
     for row in fh:
         node = Node(row)
         self.nodes[node.taxId] = node
         self.parentNodeRefs.add(node.parentTaxId, node)
示例#5
0
class PslTbl(list):
    """Table of PSL objects loaded from a tab-file
    """

    def __mkQNameIdx(self):
        self.qNameMap = MultiDict()
        for psl in self:
            self.qNameMap.add(psl.qName, psl)

    def __mkTNameIdx(self):
        self.tNameMap = MultiDict()
        for psl in self:
            self.tNameMap.add(psl.tName, psl)

    def __init__(self, fileName, qNameIdx=False, tNameIdx=False):
        for psl in PslReader(fileName):
            self.append(psl)
        self.qNameMap = self.tNameMap = None
        if qNameIdx:
            self.__mkQNameIdx()
        if tNameIdx:
            self.__mkTNameIdx()

    def getQNameIter(self):
        return self.qNameMap.iterkeys()

    def haveQName(self, qName):
        return (self.qNameMap.get(qName) != None)
        
    def getByQName(self, qName):
        """generator to get all PSL with a give qName"""
        ent = self.qNameMap.get(qName)
        if ent != None:
            if isinstance(ent, list):
                for psl in ent:
                    yield psl
            else:
                yield ent

    def getTNameIter(self):
        return self.tNameMap.iterkeys()

    def haveTName(self, tName):
        return (self.tNameMap.get(qName) != None)
        
    def getByTName(self, tName):
        """generator to get all PSL with a give tName"""
        ent = self.tNameMap.get(tName)
        if ent != None:
            if isinstance(ent, list):
                for psl in ent:
                    yield psl
            else:
                yield ent
示例#6
0
class PslTbl(list):
    """Table of PSL objects loaded from a tab-file
    """

    def __mkQNameIdx(self):
        self.qNameMap = MultiDict()
        for psl in self:
            self.qNameMap.add(psl.qName, psl)

    def __mkTNameIdx(self):
        self.tNameMap = MultiDict()
        for psl in self:
            self.tNameMap.add(psl.tName, psl)

    def __init__(self, fileName, qNameIdx=False, tNameIdx=False):
        for psl in PslReader(fileName):
            self.append(psl)
        self.qNameMap = self.tNameMap = None
        if qNameIdx:
            self.__mkQNameIdx()
        if tNameIdx:
            self.__mkTNameIdx()

    def getQNameIter(self):
        return self.qNameMap.iterkeys()

    def haveQName(self, qName):
        return (self.qNameMap.get(qName) != None)
        
    def getByQName(self, qName):
        """generator to get all PSL with a give qName"""
        ent = self.qNameMap.get(qName)
        if ent != None:
            if isinstance(ent, list):
                for psl in ent:
                    yield psl
            else:
                yield ent

    def getTNameIter(self):
        return self.tNameMap.iterkeys()

    def haveTName(self, tName):
        return (self.tNameMap.get(qName) != None)
        
    def getByTName(self, tName):
        """generator to get all PSL with a give tName"""
        ent = self.tNameMap.get(tName)
        if ent != None:
            if isinstance(ent, list):
                for psl in ent:
                    yield psl
            else:
                yield ent
示例#7
0
class BedTbl(TabFile):
    """Table of BED objects loaded from a tab-file
    """

    def _mkNameIdx(self):
        self.nameMap = MultiDict()
        for bed in self:
            self.nameMap.add(bed.name, bed)

    def __init__(self, fileName, nameIdx=False):
        TabFile.__init__(self, fileName, rowClass=Bed, hashAreComments=True)
        self.nameMap = None
        if nameIdx:
            self._nameIdx()
示例#8
0
class BedTbl(TabFile):
    """Table of BED objects loaded from a tab-file
    """

    def _mkNameIdx(self):
        self.nameMap = MultiDict()
        for bed in self:
            self.nameMap.add(bed.name, bed)

    def __init__(self, fileName, nameIdx=False):
        TabFile.__init__(self, fileName, rowClass=Bed, hashAreComments=True)
        self.nameMap = None
        if nameIdx:
            self._nameIdx()
示例#9
0
 def __init__(self, clusterGenesOut):
     self.genes = MultiDict()
     tsv = TSVReader(clusterGenesOut, typeMap=typeMap)
     self.columns = tsv.columns
     self.tableSet = set()
     for gene in tsv:
         self.__addGene(gene)
示例#10
0
文件: Taxon.py 项目: Moxikai/pubMunch
 def _loadNodes(self, dmpDir):
     self.nodes = dict()
     self.parentNodeRefs = MultiDict()
     fh = dmpFileParser(dmpDir + "/nodes.dmp")
     for row in fh:
         node = Node(row)
         self.nodes[node.taxId] = node
         self.parentNodeRefs.add(node.parentTaxId, node)
示例#11
0
class ClusterGenes(list):
    """Object to access output of ClusterGenes.  List of Cluster objects,
    indexed by clusterId.  Note that clusterId is one based, entry 0 is
    None, however generator doesn't return it or other Null clusters.
    """

    def __init__(self, clusterGenesOut):
        self.genes = MultiDict()
        tsv = TSVReader(clusterGenesOut, typeMap=typeMap)
        self.columns = tsv.columns
        self.tableSet = set()
        for gene in tsv:
            self.__addGene(gene)

    def haveCluster(self, clusterId):
        " determine if the specified cluster exists"
        if clusterId >= len(self):
            return False
        return self[clusterId] != None

    def __getCluster(self, clusterId):
        while len(self) <= clusterId:
            self.append(None)
        if self[clusterId] == None:
            self[clusterId] = Cluster(clusterId)
        return self[clusterId]

    def __addGene(self, row):
        cluster = self.__getCluster(row.cluster)
        cluster.add(row)
        self.genes.add(row.gene, row)
        self.tableSet.add(row.table)

    def __iter__(self):
        "get generator over non-null clusters"
        return self.generator()

    def generator(self):
        "generator over non-null clusters"
        for cl in list.__iter__(self):
            if cl != None:
                yield cl
示例#12
0
    def _loadNames(self, dmpDir):
        self.names = MultiDict()
        self.sciNames = dict()
        sciName = "scientific name"

        fh = dmpFileParser(dmpDir + "/names.dmp")
        for row in fh:
            name = Name(row)
            self.names[name.taxId] = name
            if name.nameClass == sciName:
                self.sciNames[name.nameTxt] = name.taxId
示例#13
0
class GenePredTbl(list):
    """Table of GenePred objects loaded from a tab-file"""
    def __init__(self,
                 fileName,
                 buildIdx=False,
                 buildUniqIdx=False,
                 buildRangeIdx=False):
        if buildIdx and buildUniqIdx:
            raise Exception("can't specify both buildIdx and buildUniqIdx")
        for row in GenePredReader(fileName):
            self.append(row)
        self.names = None
        self.rangeMap = None
        if buildUniqIdx:
            self.__buildUniqIdx()
        if buildIdx:
            self.__buildIdx()
        if buildRangeIdx:
            self.__buildRangeIdx()

    def __buildUniqIdx(self):
        self.names = dict()
        for row in self:
            if row.name in self.names:
                raise Exception("gene with this name already in index: " +
                                row.name)
            self.names[row.name] = row

    def __buildIdx(self):
        from pycbio.sys.MultiDict import MultiDict
        self.names = MultiDict()
        for row in self:
            self.names.add(row.name, row)

    def __buildRangeIdx(self):
        from pycbio.hgdata.RangeFinder import RangeFinder
        self.rangeMap = RangeFinder()
        for gene in self:
            self.rangeMap.add(gene.chrom, gene.txStart, gene.txEnd, gene,
                              gene.strand)
示例#14
0
 def _mkNameIdx(self):
     self.nameMap = MultiDict()
     for bed in self:
         self.nameMap.add(bed.name, bed)
示例#15
0
 def __buildIdx(self):
     from pycbio.sys.MultiDict import MultiDict
     self.names = MultiDict()
     for row in self:
         self.names.add(row.name, row)
示例#16
0
 def __mkTNameIdx(self):
     self.tNameMap = MultiDict()
     for psl in self:
         self.tNameMap.add(psl.tName, psl)
示例#17
0
 def __mkTNameIdx(self):
     self.tNameMap = MultiDict()
     for psl in self:
         self.tNameMap.add(psl.tName, psl)
示例#18
0
 def __buildIdx(self):
     from pycbio.sys.MultiDict import MultiDict
     self.names = MultiDict()
     for row in self:
         self.names.add(row.name, row)
示例#19
0
 def _mkNameIdx(self):
     self.nameMap = MultiDict()
     for bed in self:
         self.nameMap.add(bed.name, bed)