示例#1
0
def parseRegSpec(regSpec, genome = None, includeExtraChrs = False):
    from gtrackcore_memmap.track.core.GenomeRegion import GenomeRegion
    from gtrackcore_memmap.metadata.GenomeInfo import GenomeInfo

    class SimpleUserBinSource(list):
        pass
        
    regions = []
    allRegSpecs = regSpec.strip().split(',')
    for curRegSpec in allRegSpecs:
        regParts = curRegSpec.strip().split(':')
        if genome == None:
            genome = regParts[0]
            #assert GenomeInfo(genome).isInstalled(), "Specified genome is not installed: %s" % genome
        
        if not (regParts[0]=='*' or regParts[0] in GenomeInfo.getExtendedChrList(genome)):
        #if (regParts[0]=='*' or regParts[0].startswith('chr')):
        #    if genome == None:
        #        genome = DEFAULT_GENOME
        #else:
        #    assert genome is None or genome == regParts[0], \
    
            assert regParts[0] == genome, \
                "Region specification does not start with one of '*' or correct chromosome or genome name. Region specification: %s. Genome: %s" % (curRegSpec, genome)
            #genome = regParts[0]        
            regParts = regParts[1:]
        
        if regParts[0] == '*':
            assert len(regParts) == 1, \
                "Region specification starts with '*' but continues with ':'. Region specification: %s" % curRegSpec
            assert len(allRegSpecs) == 1, \
                "Region specification is '*', but is in a list with other region specifications: %s" % regSpec
            chrList = GenomeInfo.getExtendedChrList(genome) if includeExtraChrs else GenomeInfo.getChrList(genome)
            for chr in chrList:
                regions.append(GenomeRegion(genome, chr, 0, GenomeInfo.getChrLen(genome, chr)))
        else:
            #assert(regParts[0].startswith('chr')), \
            assert regParts[0] in GenomeInfo.getExtendedChrList(genome), \
                "Region specification does not start with chromosome specification. Region specification: %s " % curRegSpec
            chr = regParts[0]
            try:
                chrLen = GenomeInfo.getChrLen(genome, chr)
            except Exception, e:
                raise InvalidFormatError("Chromosome '%s' does not exist for genome '%s'" % (chr, genome))
                
            if len(regParts)>1:
                posParts = regParts[1]
                assert '-' in posParts, \
                    "Position specification does not include character '-'. Region specification: %s " % curRegSpec
                rawStart, rawEnd = posParts.split('-')
                
                start = int(rawStart.replace('k','001').replace('m','000001'))
                end = int(rawEnd.replace('k','000').replace('m','000000')) if rawEnd != '' else chrLen
                assert start >= 1, \
                    "Start position is not positive. Region specification: %s " % curRegSpec
                assert end >= start, \
                    "End position is not larger than start position. Region specification: %s " % curRegSpec
                assert end <= chrLen, \
                    "End position is larger than chromosome size. Genome: %s. Chromosome size: %s. Region specification: %s" % (genome, chrLen, curRegSpec)
                #-1 for conversion from 1-indexing to 0-indexing end-exclusive
                start-=1
                
            else:
                start,end = 0, chrLen
            regions.append( GenomeRegion(genome, chr, start, end) )
示例#2
0
 def __new__(cls, genome):
     from gtrackcore_memmap.track.core.GenomeRegion import GenomeRegion
     from gtrackcore_memmap.metadata.GenomeInfo import GenomeInfo
     chrList = GenomeInfo.getChrList(genome)
     if len(chrList) > 0:
         return [GenomeRegion(genome, GenomeInfo.getChrList(genome)[0], 0, 1)]