Exemplo n.º 1
0
class GenePredTbl(list):
    """Table of GenePred objects loaded from a tab-file"""
    def __init__(self, fileName, buildIdx=False, buildUniqIdx=False, buildRangeIdx=False):
        if buildIdx and buildUniqIdx:
            raise Exception("can't specify both buildIdx and buildUniqIdx")
        for row in GenePredReader(fileName):
            self.append(row)
        self.names = None
        self.rangeMap = None
        if buildUniqIdx:
            self.__buildUniqIdx()
        if buildIdx:
            self.__buildIdx()
        if buildRangeIdx:
            self.__buildRangeIdx()

    def __buildUniqIdx(self):
        self.names = dict()
        for row in self:
            if row.name in self.names:
                raise Exception("gene with this name already in index: " + row.name)
            self.names[row.name] = row

    def __buildIdx(self):
        from pycbio.sys.multiDict import MultiDict
        self.names = MultiDict()
        for row in self:
            self.names.add(row.name, row)

    def __buildRangeIdx(self):
        from pycbio.hgdata.RangeFinder import RangeFinder
        self.rangeMap = RangeFinder()
        for gene in self:
            self.rangeMap.add(gene.chrom, gene.txStart, gene.txEnd, gene, gene.strand)
Exemplo n.º 2
0
class PslTbl(list):
    """Table of PSL objects loaded from a tab-file
    """

    def __mkQNameIdx(self):
        self.qNameMap = MultiDict()
        for psl in self:
            self.qNameMap.add(psl.qName, psl)

    def __mkTNameIdx(self):
        self.tNameMap = MultiDict()
        for psl in self:
            self.tNameMap.add(psl.tName, psl)

    def __init__(self, fileName, qNameIdx=False, tNameIdx=False):
        for psl in PslReader(fileName):
            self.append(psl)
        self.qNameMap = self.tNameMap = None
        if qNameIdx:
            self.__mkQNameIdx()
        if tNameIdx:
            self.__mkTNameIdx()

    def getQNameIter(self):
        return self.qNameMap.iterkeys()

    def haveQName(self, qName):
        return (self.qNameMap.get(qName) is not None)
        
    def getByQName(self, qName):
        """generator to get all PSL with a give qName"""
        ent = self.qNameMap.get(qName)
        if ent is not None:
            if isinstance(ent, list):
                for psl in ent:
                    yield psl
            else:
                yield ent

    def getTNameIter(self):
        return self.tNameMap.iterkeys()

    def haveTName(self, tName):
        return (self.tNameMap.get(qName) is not None)
        
    def getByTName(self, tName):
        """generator to get all PSL with a give tName"""
        ent = self.tNameMap.get(tName)
        if ent is not None:
            if isinstance(ent, list):
                for psl in ent:
                    yield psl
            else:
                yield ent
Exemplo n.º 3
0
class BedTbl(TabFile):
    """Table of BED objects loaded from a tab-file
    """

    def __mkNameIdx(self):
        self.nameMap = MultiDict()
        for bed in self:
            self.nameMap.add(bed.name, bed)

    def __init__(self, fileName, nameIdx=False):
        TabFile.__init__(self, fileName, rowClass=Bed, hashAreComments=True)
        self.nameMap = None
        if nameIdx:
            self.__mkNameIdx()
Exemplo n.º 4
0
 def __init__(self, clusterGenesOut):
     self.genes = MultiDict()
     tsv = TsvReader(clusterGenesOut, typeMap=typeMap)
     self.columns = tsv.columns
     self.tableSet = set()
     for gene in tsv:
         self.__addGene(gene)
Exemplo n.º 5
0
class ClusterGenes(list):
    """Object to access output of ClusterGenes.  List of Cluster objects,
    indexed by clusterId.  Note that clusterId is one based, entry 0 is
    None, however generator doesn't return it or other Null clusters.
    """
    def __init__(self, clusterGenesOut):
        self.genes = MultiDict()
        tsv = TsvReader(clusterGenesOut, typeMap=typeMap)
        self.columns = tsv.columns
        self.tableSet = set()
        for gene in tsv:
            self.__addGene(gene)

    def haveCluster(self, clusterId):
        " determine if the specified cluster exists"
        if clusterId >= len(self):
            return False
        return self[clusterId] is not None

    def __getCluster(self, clusterId):
        while len(self) <= clusterId:
            self.append(None)
        if self[clusterId] is None:
            self[clusterId] = Cluster(clusterId)
        return self[clusterId]

    def __addGene(self, row):
        cluster = self.__getCluster(row.cluster)
        cluster.add(row)
        self.genes.add(row.gene, row)
        self.tableSet.add(row.table)
        
    def __iter__(self):
        "get generator over non-null clusters"
        return self.generator()

    def generator(self):
        "generator over non-null clusters"
        for cl in list.__iter__(self):
            if cl is not None:
                yield cl
Exemplo n.º 6
0
class GenePredTbl(list):
    """Table of GenePred objects loaded from a tab-file"""
    def __init__(self,
                 fileName,
                 buildIdx=False,
                 buildUniqIdx=False,
                 buildRangeIdx=False):
        if buildIdx and buildUniqIdx:
            raise Exception("can't specify both buildIdx and buildUniqIdx")
        for row in GenePredReader(fileName):
            self.append(row)
        self.names = None
        self.rangeMap = None
        if buildUniqIdx:
            self.__buildUniqIdx()
        if buildIdx:
            self.__buildIdx()
        if buildRangeIdx:
            self.__buildRangeIdx()

    def __buildUniqIdx(self):
        self.names = dict()
        for row in self:
            if row.name in self.names:
                raise Exception("gene with this name already in index: " +
                                row.name)
            self.names[row.name] = row

    def __buildIdx(self):
        from pycbio.sys.multiDict import MultiDict
        self.names = MultiDict()
        for row in self:
            self.names.add(row.name, row)

    def __buildRangeIdx(self):
        from pycbio.hgdata.RangeFinder import RangeFinder
        self.rangeMap = RangeFinder()
        for gene in self:
            self.rangeMap.add(gene.chrom, gene.txStart, gene.txEnd, gene,
                              gene.strand)
Exemplo n.º 7
0
 def __buildIdx(self):
     from pycbio.sys.multiDict import MultiDict
     self.names = MultiDict()
     for row in self:
         self.names.add(row.name, row)
Exemplo n.º 8
0
 def __buildIdx(self):
     from pycbio.sys.multiDict import MultiDict
     self.names = MultiDict()
     for row in self:
         self.names.add(row.name, row)
Exemplo n.º 9
0
 def __mkTNameIdx(self):
     self.tNameMap = MultiDict()
     for psl in self:
         self.tNameMap.add(psl.tName, psl)
Exemplo n.º 10
0
 def __mkNameIdx(self):
     self.nameMap = MultiDict()
     for bed in self:
         self.nameMap.add(bed.name, bed)