def __iter__(self):
        brShelve1 = self._getBoundingRegionShelve(self._trackName1)
        brShelve2 = self._getBoundingRegionShelve(self._trackName2)
        
        allBrsAreWholeChrs1 = self._commonAllBoundingRegionsAreWholeChr(brShelve1) \
            if brShelve1 is not None else False
        allBrsAreWholeChrs2 = self._commonAllBoundingRegionsAreWholeChr(brShelve2) \
            if brShelve2 is not None else False

        for chr in GenomeInfo.getExtendedChrList(self.genome):
            if brShelve1 is None:
                yield GenomeRegion(self.genome, chr, 0, GenomeInfo.getChrLen(self.genome, chr))
            else:
                brList1 = brShelve1.getAllBoundingRegionsForChr(chr)

                if brShelve2 is None or \
                    (allBrsAreWholeChrs2 and not allBrsAreWholeChrs1):
                    for reg in brList1:
                        yield reg
                else:
                    brList2 = brShelve2.getAllBoundingRegionsForChr(chr)
                    if allBrsAreWholeChrs1 and not allBrsAreWholeChrs2:
                        for reg in brList2:
                            yield reg
                    else:
                        for reg in self.getAllIntersectingRegions(self.genome, chr, brList1, brList2):
                            yield reg
示例#2
0
 def getTotalBpSpan(self):
     # print 'SELF: ', self.chr, self.start, self.end
     if self.chr is None:
         return sum(GenomeInfo.getChrLen(self.genome, chr) for chr in GenomeInfo.getExtendedChrList(self.genome))
     # elif not self.start:
     # return GenomeInfo.getChrLen(self.genome, self.chr)
     else:
         return len(self)
 def getAllBoundingRegions(self):
     if not self.fileExists():
         from gtrackcore_memmap.util.CommonFunctions import prettyPrintTrackName
         raise BoundingRegionsNotAvailableError('Bounding regions not available for track: ' + \
             prettyPrintTrackName(self._trackName))
     
     for chr in GenomeInfo.getExtendedChrList(self._genome):
         for reg in self.getAllBoundingRegionsForChr(chr):
             yield reg
 def _removeBoundingRegionTuplesIfFullChrsAndNotFixedGapSize(self):
     if self.getFixedGapSize() == 0 and not self._reprIsDense:
         # If only full chromosomes
         if all(
             brt.region.chr in GenomeInfo.getExtendedChrList(self._genome)
             and brt.region.start == 0
             and brt.region.end == GenomeInfo.getChrLen(self._genome, brt.region.chr)
             for brt in self._boundingRegionTuples
         ):
             self._boundingRegionTuples = []
示例#5
0
def parseRegSpec(regSpec, genome = None, includeExtraChrs = False):
    from gtrackcore_memmap.track.core.GenomeRegion import GenomeRegion
    from gtrackcore_memmap.metadata.GenomeInfo import GenomeInfo

    class SimpleUserBinSource(list):
        pass
        
    regions = []
    allRegSpecs = regSpec.strip().split(',')
    for curRegSpec in allRegSpecs:
        regParts = curRegSpec.strip().split(':')
        if genome == None:
            genome = regParts[0]
            #assert GenomeInfo(genome).isInstalled(), "Specified genome is not installed: %s" % genome
        
        if not (regParts[0]=='*' or regParts[0] in GenomeInfo.getExtendedChrList(genome)):
        #if (regParts[0]=='*' or regParts[0].startswith('chr')):
        #    if genome == None:
        #        genome = DEFAULT_GENOME
        #else:
        #    assert genome is None or genome == regParts[0], \
    
            assert regParts[0] == genome, \
                "Region specification does not start with one of '*' or correct chromosome or genome name. Region specification: %s. Genome: %s" % (curRegSpec, genome)
            #genome = regParts[0]        
            regParts = regParts[1:]
        
        if regParts[0] == '*':
            assert len(regParts) == 1, \
                "Region specification starts with '*' but continues with ':'. Region specification: %s" % curRegSpec
            assert len(allRegSpecs) == 1, \
                "Region specification is '*', but is in a list with other region specifications: %s" % regSpec
            chrList = GenomeInfo.getExtendedChrList(genome) if includeExtraChrs else GenomeInfo.getChrList(genome)
            for chr in chrList:
                regions.append(GenomeRegion(genome, chr, 0, GenomeInfo.getChrLen(genome, chr)))
        else:
            #assert(regParts[0].startswith('chr')), \
            assert regParts[0] in GenomeInfo.getExtendedChrList(genome), \
                "Region specification does not start with chromosome specification. Region specification: %s " % curRegSpec
            chr = regParts[0]
            try:
                chrLen = GenomeInfo.getChrLen(genome, chr)
            except Exception, e:
                raise InvalidFormatError("Chromosome '%s' does not exist for genome '%s'" % (chr, genome))
                
            if len(regParts)>1:
                posParts = regParts[1]
                assert '-' in posParts, \
                    "Position specification does not include character '-'. Region specification: %s " % curRegSpec
                rawStart, rawEnd = posParts.split('-')
                
                start = int(rawStart.replace('k','001').replace('m','000001'))
                end = int(rawEnd.replace('k','000').replace('m','000000')) if rawEnd != '' else chrLen
                assert start >= 1, \
                    "Start position is not positive. Region specification: %s " % curRegSpec
                assert end >= start, \
                    "End position is not larger than start position. Region specification: %s " % curRegSpec
                assert end <= chrLen, \
                    "End position is larger than chromosome size. Genome: %s. Chromosome size: %s. Region specification: %s" % (genome, chrLen, curRegSpec)
                #-1 for conversion from 1-indexing to 0-indexing end-exclusive
                start-=1
                
            else:
                start,end = 0, chrLen
            regions.append( GenomeRegion(genome, chr, start, end) )
示例#6
0
 def _isOldTypeChromDirectory(dirPath, genome):
     if dirPath[-1] == os.sep:
         dirPath = os.path.dirname(dirPath)
     dirName = os.path.basename(dirPath)
     return dirName in set(GenomeInfo.getExtendedChrList(genome)) and \
         not any(os.path.isdir(os.path.join(dirPath, subFn)) for subFn in os.listdir(dirPath))
 def _commonAllBoundingRegionsAreWholeChr(self, brShelve):
     for chr in GenomeInfo.getExtendedChrList(self.genome):
         for reg in brShelve.getAllBoundingRegionsForChr(chr):
             if not reg.isWholeChr():
                 return False
     return True
 def getTotalElementCount(self):
     return sum(self.getTotalElementCountForChr(chr) for chr in GenomeInfo.getExtendedChrList(self._genome))