示例#1
0
 def setFromTuple( self, tuple ):
     #TODO: we need to create Range instances because of __eq__() and isEmpty() tests, but WHY ???
     self.range_query = Range()
     self.range_subject = Range()
     if int(tuple[1]) < int(tuple[2]):
         self.range_query.setFromTuple( ( tuple[0], tuple[1], tuple[2] ) )
         self.range_subject.setFromTuple( ( tuple[3], tuple[4], tuple[5] ) )
     else:
         self.range_query.setFromTuple( ( tuple[0], tuple[2], tuple[1] ) )
         self.range_subject.setFromTuple( ( tuple[3], tuple[5], tuple[4] ) )
     self.e_value = float(tuple[6])
     self.score = float(tuple[7])
     self.identity = float(tuple[8])
示例#2
0
 def diff(self, o):
     iRange = Range.diff(self, o.getRange())
     new = Map()
     if not iRange.isEmpty():
         new.name = self.name
         new.seqname = self.seqname
         new.start = iRange.start
         new.end = iRange.end
     return new
示例#3
0
 def setFromTuple( self, tuple ):
     queryStart = int(tuple[1])
     queryEnd = int(tuple[2])
     subjectStart = int(tuple[7])
     subjectEnd = int(tuple[8])
     if queryStart < queryEnd:
         self.range_query = Range(tuple[0],queryStart,queryEnd)
         self.range_subject = Range(tuple[6],subjectStart,subjectEnd)
     else:
         self.range_query = Range(tuple[0],queryEnd,queryStart)
         self.range_subject = Range(tuple[6],subjectEnd,subjectStart)
     self.query_length = int(tuple[3])
     self.query_length_perc = float(tuple[4])
     self.query_seqlength = int( self.query_length / self.query_length_perc )
     self.match_length_perc = float(tuple[5])
     self.subject_length = int(tuple[9])
     self.subject_length_perc = float(tuple[10])
     self.subject_seqlength = int( self.subject_length / self.subject_length_perc )
     self.e_value = float(tuple[11])
     self.score = float(tuple[12])
     self.identity = float(tuple[13])
     self.id = int(tuple[14])
示例#4
0
class Match( Path ):
    
    ## Constructor
    #
    def __init__(self):
        Path.__init__(self)
        self.query_length = -1
        self.query_length_perc = -1    # length of the match on the query / length of the query
        self.query_seqlength = -1
        self.match_length_perc = -1    # length of the match on the query / total length of the subject
        self.subject_length = -1
        self.subject_length_perc = -1    # length of the match on the subject / length of the subject
        self.subject_seqlength = -1
        
    ## Equal operator
    #
    def __eq__(self, o):
        if self.query_length != o.query_length or self.query_length_perc != o.query_length_perc\
        or self.query_seqlength != o.query_seqlength or self.subject_length != o.subject_length\
        or self.subject_length_perc != o.subject_length_perc or self.subject_seqlength != o.subject_seqlength\
        or self.match_length_perc != o.match_length_perc:
            return False
        else:
            return Path.__eq__(self, o)
        
    ## Return the length of the match on the query divided by the total length of the query
    #
    def getLengthPercOnQuery(self):
        return self.query_length_perc
    
    ## Return the length of the match on the subject divided by the total length of the subject
    #
    def getLengthPercOnSubject(self):
        return self.subject_length_perc
    
    ## Return the length of the match on the subject
    #
    def getLengthMatchOnSubject(self):
        return self.subject_length
    
    ## Set attributes from a tuple
    # 
    # @param tuple: a tuple with (query name,query start,query end,
    #  query length, query length perc (between 0-1), match length perc (between 0-1), subject name,
    #  subject start,subject end,subject length, subject length percentage (between 0-1), e_value,score,identity,id)
    #
    def setFromTuple( self, tuple ):
        queryStart = int(tuple[1])
        queryEnd = int(tuple[2])
        subjectStart = int(tuple[7])
        subjectEnd = int(tuple[8])
        if queryStart < queryEnd:
            self.range_query = Range(tuple[0],queryStart,queryEnd)
            self.range_subject = Range(tuple[6],subjectStart,subjectEnd)
        else:
            self.range_query = Range(tuple[0],queryEnd,queryStart)
            self.range_subject = Range(tuple[6],subjectEnd,subjectStart)
        self.query_length = int(tuple[3])
        self.query_length_perc = float(tuple[4])
        self.query_seqlength = int( self.query_length / self.query_length_perc )
        self.match_length_perc = float(tuple[5])
        self.subject_length = int(tuple[9])
        self.subject_length_perc = float(tuple[10])
        self.subject_seqlength = int( self.subject_length / self.subject_length_perc )
        self.e_value = float(tuple[11])
        self.score = float(tuple[12])
        self.identity = float(tuple[13])
        self.id = int(tuple[14])
        
    ## Reset
    #
    def reset( self ):
        Path.reset( self )
        self.query_length = -1
        self.query_length_perc = -1
        self.query_seqlength = -1
        self.match_length_perc = -1
        self.subject_length = -1
        self.subject_length_perc = -1
        self.subject_seqlength = -1
        
    ## Return a formated string of the attribute data
    # 
    def toString( self ):
        string = "%s" % ( self.range_query.toString() )
        string += "\t%i\t%f" % ( self.query_length,
                                     self.query_length_perc )
        string += "\t%f" % ( self.match_length_perc )
        string += "\t%s" % ( self.range_subject.toString() )
        string += "\t%i\t%f" % ( self.subject_length,
                                 self.subject_length_perc )
        string += "\t%g\t%i\t%f" % ( self.e_value,
                                     self.score,
                                     self.identity )
        string += "\t%i" % ( self.id )
        return string
    
    ## Return a Path instance
    #
    def getPathInstance( self ):
        p = Path()
        tuple = ( self.id,
                  self.range_query.seqname,
                  self.range_query.start,
                  self.range_query.end,
                  self.range_subject.seqname,
                  self.range_subject.start,
                  self.range_subject.end,
                  self.e_value,
                  self.score,
                  self.identity )
        p.setFromTuple( tuple )
        return p
    
    ## Give information about a match whose query is included in the subject
    # 
    # @return string
    #
    def getQryIsIncluded( self ):
        string = "query %s (%d bp: %d-%d) is contained in subject %s (%d bp: %d-%d): id=%.2f - %.3f - %.3f - %.3f" %\
                 ( self.range_query.seqname, self.query_seqlength, self.range_query.start, self.range_query.end,
                   self.range_subject.seqname, self.subject_seqlength, self.range_subject.start, self.range_subject.end,
                   self.identity, self.query_length_perc, self.match_length_perc, self.subject_length_perc )
        return string
    
    ## Compare the object with another match and see if they are equal
    # (same identity, E-value and score + same subsequences whether in query or subject)
    #
    # @return True if objects are equals False otherwise
    #
    def isDoublonWith( self, match, verbose=0 ):

        # if both matches have same identity, score and E-value
        if self.identity == match.identity and self.score == match.score and self.e_value == match.e_value:

            # if query and subject are identical
            if ( self.range_query.seqname == match.range_query.seqname \
                 and self.range_subject.seqname == match.range_subject.seqname ):

                # if the coordinates are equal
                if self.range_query.__eq__( match.range_query ) and self.range_subject.__eq__( match.range_subject ):
                    return True

                else:
                    if verbose > 0: print "different coordinates"; sys.stdout.flush()
                    return False

            # if query and subject are reversed but identical
            elif self.range_query.seqname == match.range_subject.seqname and self.range_subject.seqname == match.range_query.seqname:

                # if the coordinates are equal
                if self.range_query.__eq__( match.range_subject ) and self.range_subject.__eq__( match.range_query ):
                    return True

                else:
                    if verbose > 0: print "different coordinates"; sys.stdout.flush()
                    return False

            else:
                if verbose > 0: print "different sequence names"; sys.stdout.flush()
                return False

        else:
            if verbose > 0: print "different match numbers"; sys.stdout.flush()
            return False
示例#5
0
class Align( object ):
    
    ## Constructor
    #
    # @param range_q: a Range instance for the query
    # @param range_s: a Range instance for the subject
    # @param e_value: E-value of the match 
    # @param identity: identity percentage of the match
    # @param score: score of the match
    #
    def __init__(self, range_q=Range(), range_s=Range(), e_value=0, identity=0, score=0):
        self.range_query = range_q
        self.range_subject = range_s
        self.e_value = float(e_value)
        self.score = int(score)
        self.identity = float(identity)
       
    ## Return True if the instance is empty, False otherwise
    #
    def isEmpty(self):
        return self.range_query.isEmpty() or self.range_subject.isEmpty()
        
    ## Equal operator
    #
    def __eq__(self, o):
        if self.range_query==o.range_query and self.range_subject==o.range_subject and \
        self.e_value==o.e_value and self.score==o.score and self.identity==o.identity:
            return True
        return False
    
    ## Unequal operator
    #
    # @param o a Range instance
    #
    def __ne__(self, o):
        return not self.__eq__(o)
    
    ## Convert the object into a string
    #
    # @note used in 'print myObject'
    #
    def __str__( self ):
        return self.toString()
    
    ## Read attributes from an Align file
    # 
    # @param fileHandler: file handler of the file being read
    # @return: 1 on success, 0 at the end of the file 
    #
    def read(self, fileHandler):
        self.reset()
        line = fileHandler.readline()
        if line == "":
            return 0
        tokens = line.split("\t")
        if len(tokens) < len(self.__dict__.keys()):
            return 0
        self.setFromTuple(tokens)
        return 1
    
    ## Set attributes from tuple
    #
    # @param tuple a tuple with (queryName,queryStart,queryEnd,subjectName,subjectStar,subjectEnd,E-value,score,identity)
    # @note data are loaded such that the query is always on the direct strand
    #
    def setFromTuple( self, tuple ):
        #TODO: we need to create Range instances because of __eq__() and isEmpty() tests, but WHY ???
        self.range_query = Range()
        self.range_subject = Range()
        if int(tuple[1]) < int(tuple[2]):
            self.range_query.setFromTuple( ( tuple[0], tuple[1], tuple[2] ) )
            self.range_subject.setFromTuple( ( tuple[3], tuple[4], tuple[5] ) )
        else:
            self.range_query.setFromTuple( ( tuple[0], tuple[2], tuple[1] ) )
            self.range_subject.setFromTuple( ( tuple[3], tuple[5], tuple[4] ) )
        self.e_value = float(tuple[6])
        self.score = float(tuple[7])
        self.identity = float(tuple[8])
        
    ## Reset
    #
    def reset( self ):
        self.range_query.reset()
        self.range_subject.reset()
        self.e_value = 0
        self.score = 0
        self.identity = 0
        
    ## Return the attributes as a formatted string
    #
    def toString(self):
        string = "%s" % ( self.range_query.toString() )
        string += "\t%s" % ( self.range_subject.toString() )
        string += "\t%g\t%i\t%f" % ( self.e_value, self.score, self.identity )
        return string
    
    
    ## Return the attributes as a GFF-formatted string
    #
    def toStringAsGff( self, source="REPET", type="match", phase=".", ID="", Parent="" ):
        if not self.isSubjectOnDirectStrand():
            self.reverse()
        string = "%s" % ( self.getQueryName() )
        string += "\t%s" % ( source )
        string += "\t%s" % ( type )
        string += "\t%s" % ( self.getQueryMin() )
        string += "\t%s" % ( self.getQueryMax() )
        string += "\t%g" % ( self.e_value )
        string += "\t%s" % ( self.getQueryStrand() )
        string += "\t%s" % ( phase )
        attributes = ""
        if ID != "":
            attributes += "ID=%s" % ( ID )
        else:
            attributes += "ID=%i" % ( str(time.time())[-8:-1].replace(".","") )
        if Parent != "":
            attributes += ";Parent=%s" % ( Parent )
        attributes += ";Target=%s %i %i" % ( self.getSubjectName(), self.getSubjectStart(), self.getSubjectEnd() )
        string += "\t%s" % ( attributes )
        return string
    
    
    ## Reverse query and subject
    #
    def reverse(self):
        self.range_query.reverse()
        self.range_subject.reverse()
        
    ## Show the attributes
    #
    def show(self):
        print self.toString()
 
    ## Write attributes into an Align file
    #
    # @param fileHandler: file handler of the file being filled
    #
    def write(self, fileHandler):
        fileHandler.write("%s\n" % (self.toString()))
        
    ## Save attributes into an Align file
    #
    # @param file: name of the file being filled
    #
    def save(self, file):
        fileHandler = open( file, "a" )
        self.write( fileHandler )
        fileHandler.close()
        
    ## Return the score
    #
    def getScore(self):
        return self.score

    ## Return the identity
    #
    def getIdentity(self):
        return self.identity
    
    def getEvalue(self):
        return self.e_value
    
    ## Return the length on the query
    #
    def getLengthOnQuery(self):
        return self.range_query.getLength()
    
    ## Return the name of the query
    #
    def getQueryName( self ):
        return self.range_query.seqname
    
    ## Return the start of the query
    #
    def getQueryStart( self ):
        return self.range_query.start
    
    ## Return the end of the query
    #
    def getQueryEnd( self ):
        return self.range_query.end
    
    ## Return the min of the query
    #
    def getQueryMin( self ):
        return self.range_query.getMin()
    
    ## Return the max of the query
    #
    def getQueryMax( self ):
        return self.range_query.getMax()
    
    ## Return the strand of the query
    #
    def getQueryStrand( self ):
        return self.range_query.getStrand()
    
    ## Return the name of the subject
    #
    def getSubjectName( self ):
        return self.range_subject.seqname
    
    ## Return the start of the subject
    #
    def getSubjectStart( self ):
        return self.range_subject.start
    
    ## Return the end of the subject
    #
    def getSubjectEnd( self ):
        return self.range_subject.end
    
    ## Return the strand of the subject
    #
    def getSubjectStrand( self ):
        return self.range_subject.getStrand()
    
    ## Return the query as a Range instance
    #
    def getQueryAsRange( self ):
        return self.range_query
    
    ## Return the subject as a Range instance
    #
    def getSubjectAsRange( self ):
        return self.range_subject
    
    ## Set the name of the query
    #
    def setQueryName( self, name ):
        self.range_query.seqname = name
        
    ## Set the start of the query
    #
    def setQueryStart( self, start ):
        self.range_query.start = start
        
    ## Set the end of the query
    #
    def setQueryEnd( self, end ):
        self.range_query.end = end
    
    ## Set the name of the subject
    #
    def setSubjectName( self, name ):
        self.range_subject.seqname = name
        
    ## Set the start of the subject
    #
    def setSubjectStart( self, start ):
        self.range_subject.start = start
        
    ## Set the end of the subject
    #
    def setSubjectEnd( self, end ):
        self.range_subject.end = end
        
    ## Merge the instance with another Align instance
    #
    # @param o an Align instance
    #
    def merge(self, o):
        if self.range_query.seqname != o.range_query.seqname \
               or self.range_subject.seqname != o.range_subject.seqname:
            return
        self.range_query.merge(o.range_query)
        self.range_subject.merge(o.range_subject)
        self.score = max(self.score,o.score)
        self.e_value = min(self.e_value,o.e_value)
        self.identity = max(self.identity,o.identity)
        
    ## Return a Map instance with the subject mapped on the query
    #
    def getSubjectAsMapOfQuery(self):
        iMap = Map()
        iMap.name = self.range_subject.seqname
        iMap.seqname = self.range_query.seqname
        if self.range_subject.isOnDirectStrand():
            iMap.start = self.range_query.start
            iMap.end = self.range_query.end
        else:
            iMap.start = self.range_query.end
            iMap.end = self.range_query.start
        return iMap
    
    ## Return True if query is on direct strand
    #
    def isQueryOnDirectStrand( self ):
        return self.range_query.isOnDirectStrand()
    
    ## Return True if subject is on direct strand
    #
    def isSubjectOnDirectStrand( self ):
        return self.range_subject.isOnDirectStrand()
    
    ## Return True if query and subject are on the same strand, False otherwise
    #
    def areQrySbjOnSameStrand(self):
        return self.isQueryOnDirectStrand() == self.isSubjectOnDirectStrand()
    
    ## Return False if query and subject are on the same strand, True otherwise
    #
    def areQrySbjOnOppositeStrands(self):
        return not self.areQrySbjOnSameStrand()

    ## Set attributes from string
    #
    # @param string a string formatted like queryName queryStart queryEnd subjectName subjectStart subjectEnd E-value score identity
    # @param sep field separator
    #
    def setFromString(self, string, sep="\t"):
        if string[-1] == "\n":
            string = string[:-1]
        self.setFromTuple( string.split(sep) )
        
    ## Return a first Map instance for the query and a second for the subject
    #
    def getMapsOfQueryAndSubject(self):
        iMapQuery = Map( name="repet",
                         seqname=self.range_query.seqname,
                         start=self.range_query.start,
                         end=self.range_query.end )
        iMapSubject = Map( name="repet",
                         seqname=self.range_subject.seqname,
                         start=self.range_subject.start,
                         end=self.range_subject.end )
        return iMapQuery, iMapSubject
    
    ## Write query coordinates as Map in a file
    #
    # @param fileHandler: file handler of the file being filled
    #
    def writeSubjectAsMapOfQuery( self, fileHandler ):
        m = self.getSubjectAsMapOfQuery()
        m.write( fileHandler )
        
    ## Return a bin for fast database access
    #
    def getBin(self):
        return self.range_query.getBin()
    
    ## Switch query and subject
    #
    def switchQuerySubject( self ):
        tmpRange = self.range_query
        self.range_query = self.range_subject
        self.range_subject = tmpRange
        if not self.isQueryOnDirectStrand():
            self.reverse()
            
    ## Return True if the query overlaps with the query of another Align instance, False otherwise
    #
    def isQueryOverlapping( self, iAlign ):
        return self.getQueryAsRange().isOverlapping( iAlign.getQueryAsRange() )
    
    ## Return True if the subject overlaps with the subject of another Align instance, False otherwise
    #
    def isSubjectOverlapping( self, iAlign ):
        return self.getSubjectAsRange().isOverlapping( iAlign.getSubjectAsRange() )
    
    ## Return True if the Align instance overlaps with another Align instance, False otherwise
    #
    def isOverlapping( self, iAlign ):
        if self.isQueryOverlapping( iAlign ) and self.isSubjectOverlapping( iAlign ):
            return True
        else:
            return False
        
    ## Update the score
    #
    # @note the new score is the length on the query times the percentage of identity
    #
    def updateScore( self ):
        newScore = self.getLengthOnQuery() * self.getIdentity() / 100.0
        self.score = newScore
示例#6
0
 def setFromTuple(self, tuple):
     self.name = tuple[0]
     Range.setFromTuple(self, tuple[1:])
示例#7
0
 def __eq__(self, o):
     if self.name == o.name:
         return Range.__eq__(self, o)
     return False
示例#8
0
 def __init__(self, name="", seqname="", start=-1, end=-1):
     self.name = name
     Range.__init__( self, seqname, start, end )
示例#9
0
 def toString(self):
     string = "%s" % (self.name)
     string += "\t%s" % (Range.toString(self))
     return string