class GenePredTbl(list): """Table of GenePred objects loaded from a tab-file""" def __init__(self, fileName, buildIdx=False, buildUniqIdx=False, buildRangeIdx=False): if buildIdx and buildUniqIdx: raise Exception("can't specify both buildIdx and buildUniqIdx") for row in GenePredReader(fileName): self.append(row) self.names = None self.rangeMap = None if buildUniqIdx: self.__buildUniqIdx() if buildIdx: self.__buildIdx() if buildRangeIdx: self.__buildRangeIdx() def __buildUniqIdx(self): self.names = dict() for row in self: if row.name in self.names: raise Exception("gene with this name already in index: " + row.name) self.names[row.name] = row def __buildIdx(self): from pycbio.sys.multiDict import MultiDict self.names = MultiDict() for row in self: self.names.add(row.name, row) def __buildRangeIdx(self): from pycbio.hgdata.RangeFinder import RangeFinder self.rangeMap = RangeFinder() for gene in self: self.rangeMap.add(gene.chrom, gene.txStart, gene.txEnd, gene, gene.strand)
class PslTbl(list): """Table of PSL objects loaded from a tab-file """ def __mkQNameIdx(self): self.qNameMap = MultiDict() for psl in self: self.qNameMap.add(psl.qName, psl) def __mkTNameIdx(self): self.tNameMap = MultiDict() for psl in self: self.tNameMap.add(psl.tName, psl) def __init__(self, fileName, qNameIdx=False, tNameIdx=False): for psl in PslReader(fileName): self.append(psl) self.qNameMap = self.tNameMap = None if qNameIdx: self.__mkQNameIdx() if tNameIdx: self.__mkTNameIdx() def getQNameIter(self): return self.qNameMap.iterkeys() def haveQName(self, qName): return (self.qNameMap.get(qName) is not None) def getByQName(self, qName): """generator to get all PSL with a give qName""" ent = self.qNameMap.get(qName) if ent is not None: if isinstance(ent, list): for psl in ent: yield psl else: yield ent def getTNameIter(self): return self.tNameMap.iterkeys() def haveTName(self, tName): return (self.tNameMap.get(qName) is not None) def getByTName(self, tName): """generator to get all PSL with a give tName""" ent = self.tNameMap.get(tName) if ent is not None: if isinstance(ent, list): for psl in ent: yield psl else: yield ent
class BedTbl(TabFile): """Table of BED objects loaded from a tab-file """ def __mkNameIdx(self): self.nameMap = MultiDict() for bed in self: self.nameMap.add(bed.name, bed) def __init__(self, fileName, nameIdx=False): TabFile.__init__(self, fileName, rowClass=Bed, hashAreComments=True) self.nameMap = None if nameIdx: self.__mkNameIdx()
def __init__(self, clusterGenesOut): self.genes = MultiDict() tsv = TsvReader(clusterGenesOut, typeMap=typeMap) self.columns = tsv.columns self.tableSet = set() for gene in tsv: self.__addGene(gene)
class ClusterGenes(list): """Object to access output of ClusterGenes. List of Cluster objects, indexed by clusterId. Note that clusterId is one based, entry 0 is None, however generator doesn't return it or other Null clusters. """ def __init__(self, clusterGenesOut): self.genes = MultiDict() tsv = TsvReader(clusterGenesOut, typeMap=typeMap) self.columns = tsv.columns self.tableSet = set() for gene in tsv: self.__addGene(gene) def haveCluster(self, clusterId): " determine if the specified cluster exists" if clusterId >= len(self): return False return self[clusterId] is not None def __getCluster(self, clusterId): while len(self) <= clusterId: self.append(None) if self[clusterId] is None: self[clusterId] = Cluster(clusterId) return self[clusterId] def __addGene(self, row): cluster = self.__getCluster(row.cluster) cluster.add(row) self.genes.add(row.gene, row) self.tableSet.add(row.table) def __iter__(self): "get generator over non-null clusters" return self.generator() def generator(self): "generator over non-null clusters" for cl in list.__iter__(self): if cl is not None: yield cl
def __buildIdx(self): from pycbio.sys.multiDict import MultiDict self.names = MultiDict() for row in self: self.names.add(row.name, row)
def __mkTNameIdx(self): self.tNameMap = MultiDict() for psl in self: self.tNameMap.add(psl.tName, psl)
def __mkNameIdx(self): self.nameMap = MultiDict() for bed in self: self.nameMap.add(bed.name, bed)