Пример #1
0
 def mergeFile( inFile, outFile="" ):
     if outFile == "":
         outFile = "%s.merged" % ( inFile )
     if os.path.exists( outFile ):
         os.remove( outFile )
         
     tmpFile = "%s.sorted" % ( inFile )
     AlignUtils.sortAlignFile( inFile, tmpFile )
     
     tmpF = open( tmpFile, "r" )
     dQrySbj2Aligns = {}
     prevPairQrySbj = ""
     while True:
         line = tmpF.readline()
         if line == "":
             break
         iAlign = Align()
         iAlign.setFromString( line )
         pairQrySbj = "%s_%s" % ( iAlign.getQueryName(), iAlign.getSubjectName() )
         if not dQrySbj2Aligns.has_key( pairQrySbj ):
             if prevPairQrySbj != "":
                 lMerged = AlignUtils.mergeList( dQrySbj2Aligns[ prevPairQrySbj ] )
                 AlignUtils.writeListInFile( lMerged, outFile, "a" )
                 del dQrySbj2Aligns[ prevPairQrySbj ]
                 prevPairQrySbj = pairQrySbj
             else:
                 prevPairQrySbj = pairQrySbj
             dQrySbj2Aligns[ pairQrySbj ] = []
         dQrySbj2Aligns[ pairQrySbj ].append( iAlign )
     lMerged = []
     if len(dQrySbj2Aligns.keys()) > 0:
         lMerged = AlignUtils.mergeList( dQrySbj2Aligns[ prevPairQrySbj ] )
     AlignUtils.writeListInFile( lMerged, outFile, "a" )
     tmpF.close()
     os.remove( tmpFile )
Пример #2
0
 def getAlignListFromFile( inFile ):
     lAlignInstances = []
     inFileHandler = open( inFile, "r" )
     while True:
         line = inFileHandler.readline()
         if line == "":
             break
         a = Align()
         a.setFromString( line )
         lAlignInstances.append( a )
     inFileHandler.close()
     return lAlignInstances
Пример #3
0
 def getScoreListFromFile( inFile ):
     lScores = []
     inFileHandler = open( inFile, "r" )
     iAlign = Align()
     while True:
         line = inFileHandler.readline()
         if line == "":
             break
         iAlign.reset()
         iAlign.setFromString( line )
         lScores.append( iAlign.score )
     inFileHandler.close()
     return lScores
Пример #4
0
 def convertAlignFileIntoMapFileWithSubjectsOnQueries( alignFile, mapFile ):
     alignFileHandler = open( alignFile, "r" )
     mapFileHandler = open( mapFile, "w" )
     iAlign = Align()
     while True:
         line = alignFileHandler.readline()
         if line == "":
             break
         iAlign.setFromString( line )
         iMapQ = iAlign.getSubjectAsMapOfQuery()
         iMapQ.write( mapFileHandler )
     alignFileHandler.close()
     mapFileHandler.close()
Пример #5
0
 def convertAlignFileIntoPathFile( alignFile, pathFile ):
     alignFileHandler = open( alignFile, "r" )
     pathFileHandler = open( pathFile, "w" )
     iAlign = Align()
     countAlign = 0
     while True:
         line = alignFileHandler.readline()
         if line == "":
             break
         countAlign += 1
         iAlign.setFromString( line, "\t" )
         pathFileHandler.write( "%i\t%s\n" % ( countAlign, iAlign.toString() ) )
     alignFileHandler.close()
     pathFileHandler.close()
Пример #6
0
 def getAlignInstance(self):
     iAlign = Align()
     lAttributes = []
     lAttributes.append( self.range_query.seqname )
     lAttributes.append( self.range_query.start )
     lAttributes.append( self.range_query.end )
     lAttributes.append( self.range_subject.seqname )
     lAttributes.append( self.range_subject.start )
     lAttributes.append( self.range_subject.end )
     lAttributes.append( self.e_value )
     lAttributes.append( self.score )
     lAttributes.append( self.identity )
     iAlign.setFromTuple( lAttributes )
     return iAlign
Пример #7
0
 def convertAlignFileIntoMapFileWithQueriesAndSubjects( alignFile, mapFile ):
     alignFileHandler = open( alignFile, "r" )
     mapFileHandler = open( mapFile, "w" )
     iAlign = Align()
     while True:
         line = alignFileHandler.readline()
         if line == "":
             break
         iAlign.setFromString( line )
         iMapQ, iMapS = iAlign.getMapsOfQueryAndSubject()
         iMapQ.write( mapFileHandler )
         iMapS.write( mapFileHandler )
     alignFileHandler.close()
     mapFileHandler.close()
Пример #8
0
def filterRedundantMatches( inFile, outFile ):
    """
    When a pairwise alignment is launched ~ all-by-all (ie one batch against all chunks),
    one filters the redundant matches. For instance we keep 'chunk3-1-100-chunk7-11-110-...'
    and we discards 'chunk7-11-110-chunk3-1-100-...'.
    Also we keep 'chunk5-1-100-chunk5-11-110-...' and we discards
    'chunk5-11-110-chunk5-1-100-...'.
    For this of course the results need to be sorted by query, on plus strand,
    and in ascending coordinates (always the case with Blaster).
    """
    inFileHandler = open( inFile, "r" )
    outFileHandler = open( outFile, "w" )
    iAlign = Align()
    countMatches = 0
    tick = 100000
    while True:
        line = inFileHandler.readline()
        if line == "":
            break
        countMatches += 1
        iAlign.setFromString( line )
        if "chunk" not in iAlign.range_query.seqname \
               or "chunk" not in iAlign.range_subject.seqname:
            print "ERROR: 'chunk' not in seqname"
            sys.exit(1)
        if int(iAlign.range_query.seqname.split("chunk")[1]) < int(iAlign.range_subject.seqname.split("chunk")[1]):
            iAlign.write( outFileHandler )
        elif int(iAlign.range_query.seqname.split("chunk")[1]) == int(iAlign.range_subject.seqname.split("chunk")[1]):
            if iAlign.range_query.getMin() < iAlign.range_subject.getMin():
                iAlign.write( outFileHandler )
        if countMatches % tick == 0:   # need to free buffer frequently as file can be big
            outFileHandler.flush()
            os.fsync( outFileHandler.fileno() )
    inFileHandler.close()
    outFileHandler.close()
 def retrieveInitialSequenceHeadersForAlignFile( self, dNew2Init ):
     inFileHandler = open( self._inFile, "r" )
     outFileHandler = open( self._outFile, "w" )
     a = Align()
     while True:
         line = inFileHandler.readline()
         if line == "":
             break
         a.setFromTuple( line.split("\t") )
         nameToBeReplaced = a.range_query.seqname
         if dNew2Init.has_key( nameToBeReplaced ):
             a.range_query.seqname = dNew2Init[ nameToBeReplaced ]
         nameToBeReplaced = a.range_subject.seqname
         if dNew2Init.has_key( nameToBeReplaced ):
             a.range_subject.seqname = dNew2Init[ nameToBeReplaced ]
         a.write( outFileHandler )
     inFileHandler.close()
     outFileHandler.close()
Пример #10
0
 def updateScoresInFile( inFile, outFile ):
     inHandler = open( inFile, "r" )
     outHandler = open( outFile, "w" )
     iAlign = Align()
     
     while True:
         line = inHandler.readline()
         if line == "":
             break
         iAlign.reset()
         iAlign.setFromString( line, "\t" )
         iAlign.updateScore()
         iAlign.write( outHandler )
         
     inHandler.close()
     outHandler.close()
Пример #11
0
 def toString(self):
     string = "%i" % ( self.id )
     string += "\t%s" % (Align.toString(self))
     return string
Пример #12
0
 def reset(self):
     self.id = -1
     Align.reset(self)
Пример #13
0
 def setFromTuple(self, tuple):
     self.id = int(tuple[0])
     Align.setFromTuple(self, tuple[1:])
Пример #14
0
 def __eq__(self, o):
     if self.id != o.id:
         return False
     else:
         return Align.__eq__(self, o)
Пример #15
0
 def __init__( self, id=-1, range_q=Range(), range_s=Range(), e_value=0, score=0, identity=0 ):
     self.id = int( id )
     Align.__init__( self, range_q, range_s, e_value, score, identity )