def spliceFromCoords( genomeFile, coordFile, obsFile ): genomeFileHandler = open( genomeFile, "r" ) obsFileHandler = open( obsFile, "w" ) dChr2Maps = MapUtils.getDictPerSeqNameFromMapFile( coordFile ) while True: bs = Bioseq() bs.read( genomeFileHandler ) if bs.sequence == None: break if dChr2Maps.has_key( bs.header ): lCoords = MapUtils.getMapListSortedByIncreasingMinThenMax( dChr2Maps[ bs.header ] ) splicedSeq = "" currentSite = 0 for iMap in lCoords: minSplice = iMap.getMin() - 1 if minSplice > currentSite: splicedSeq += bs.sequence[ currentSite : minSplice ] currentSite = iMap.getMax() splicedSeq += bs.sequence[ currentSite : ] bs.sequence = splicedSeq bs.write( obsFileHandler ) genomeFileHandler.close() obsFileHandler.close()
def convertFastaHeadersFromChkToChr( inFile, mapFile, outFile ): inFileHandler = open( inFile, "r" ) outFileHandler = open( outFile, "w" ) dChunk2Map = MapUtils.getDictPerNameFromMapFile( mapFile ) iConvCoord = ConvCoord() while True: line = inFileHandler.readline() if line == "": break if line[0] == ">": if "{Fragment}" not in line: outFileHandler.write( line ) continue chkName = line.split(" ")[1] chrName = dChunk2Map[ chkName ].seqname lCoordPairs = line.split(" ")[3].split(",") lRangesOnChk = [] for i in lCoordPairs: iRange = Range( chkName, int(i.split("..")[0]), int(i.split("..")[1]) ) lRangesOnChk.append( iRange ) lRangesOnChr = [] for iRange in lRangesOnChk: lRangesOnChr.append( iConvCoord.getRangeOnChromosome( iRange, dChunk2Map ) ) newHeader = line[1:-1].split(" ")[0] newHeader += " %s" % ( chrName ) newHeader += " {Fragment}" newHeader += " %i..%i" % ( lRangesOnChr[0].start, lRangesOnChr[0].end ) for iRange in lRangesOnChr[1:]: newHeader += ",%i..%i" % ( iRange.start, iRange.end ) outFileHandler.write( ">%s\n" % ( newHeader ) ) else: outFileHandler.write( line ) inFileHandler.close() outFileHandler.close()
def getSetListOverlappingCoord( self, seqName, start, end ): lMaps = self.getListOverlappingCoord( seqName, start, end ) lSets = MapUtils.mapList2SetList( lMaps ) return lSets
def getSetListFromSeqName( self, seqName ): lMaps = self.getListFromSeqName( seqName ) lSets = MapUtils.mapList2SetList( lMaps ) return lSets