def setFromTuple( self, tuple ): #TODO: we need to create Range instances because of __eq__() and isEmpty() tests, but WHY ??? self.range_query = Range() self.range_subject = Range() if int(tuple[1]) < int(tuple[2]): self.range_query.setFromTuple( ( tuple[0], tuple[1], tuple[2] ) ) self.range_subject.setFromTuple( ( tuple[3], tuple[4], tuple[5] ) ) else: self.range_query.setFromTuple( ( tuple[0], tuple[2], tuple[1] ) ) self.range_subject.setFromTuple( ( tuple[3], tuple[5], tuple[4] ) ) self.e_value = float(tuple[6]) self.score = float(tuple[7]) self.identity = float(tuple[8])
def diff(self, o): iRange = Range.diff(self, o.getRange()) new = Map() if not iRange.isEmpty(): new.name = self.name new.seqname = self.seqname new.start = iRange.start new.end = iRange.end return new
def setFromTuple( self, tuple ): queryStart = int(tuple[1]) queryEnd = int(tuple[2]) subjectStart = int(tuple[7]) subjectEnd = int(tuple[8]) if queryStart < queryEnd: self.range_query = Range(tuple[0],queryStart,queryEnd) self.range_subject = Range(tuple[6],subjectStart,subjectEnd) else: self.range_query = Range(tuple[0],queryEnd,queryStart) self.range_subject = Range(tuple[6],subjectEnd,subjectStart) self.query_length = int(tuple[3]) self.query_length_perc = float(tuple[4]) self.query_seqlength = int( self.query_length / self.query_length_perc ) self.match_length_perc = float(tuple[5]) self.subject_length = int(tuple[9]) self.subject_length_perc = float(tuple[10]) self.subject_seqlength = int( self.subject_length / self.subject_length_perc ) self.e_value = float(tuple[11]) self.score = float(tuple[12]) self.identity = float(tuple[13]) self.id = int(tuple[14])
class Match( Path ): ## Constructor # def __init__(self): Path.__init__(self) self.query_length = -1 self.query_length_perc = -1 # length of the match on the query / length of the query self.query_seqlength = -1 self.match_length_perc = -1 # length of the match on the query / total length of the subject self.subject_length = -1 self.subject_length_perc = -1 # length of the match on the subject / length of the subject self.subject_seqlength = -1 ## Equal operator # def __eq__(self, o): if self.query_length != o.query_length or self.query_length_perc != o.query_length_perc\ or self.query_seqlength != o.query_seqlength or self.subject_length != o.subject_length\ or self.subject_length_perc != o.subject_length_perc or self.subject_seqlength != o.subject_seqlength\ or self.match_length_perc != o.match_length_perc: return False else: return Path.__eq__(self, o) ## Return the length of the match on the query divided by the total length of the query # def getLengthPercOnQuery(self): return self.query_length_perc ## Return the length of the match on the subject divided by the total length of the subject # def getLengthPercOnSubject(self): return self.subject_length_perc ## Return the length of the match on the subject # def getLengthMatchOnSubject(self): return self.subject_length ## Set attributes from a tuple # # @param tuple: a tuple with (query name,query start,query end, # query length, query length perc (between 0-1), match length perc (between 0-1), subject name, # subject start,subject end,subject length, subject length percentage (between 0-1), e_value,score,identity,id) # def setFromTuple( self, tuple ): queryStart = int(tuple[1]) queryEnd = int(tuple[2]) subjectStart = int(tuple[7]) subjectEnd = int(tuple[8]) if queryStart < queryEnd: self.range_query = Range(tuple[0],queryStart,queryEnd) self.range_subject = Range(tuple[6],subjectStart,subjectEnd) else: self.range_query = Range(tuple[0],queryEnd,queryStart) self.range_subject = Range(tuple[6],subjectEnd,subjectStart) self.query_length = int(tuple[3]) self.query_length_perc = float(tuple[4]) self.query_seqlength = int( self.query_length / self.query_length_perc ) self.match_length_perc = float(tuple[5]) self.subject_length = int(tuple[9]) self.subject_length_perc = float(tuple[10]) self.subject_seqlength = int( self.subject_length / self.subject_length_perc ) self.e_value = float(tuple[11]) self.score = float(tuple[12]) self.identity = float(tuple[13]) self.id = int(tuple[14]) ## Reset # def reset( self ): Path.reset( self ) self.query_length = -1 self.query_length_perc = -1 self.query_seqlength = -1 self.match_length_perc = -1 self.subject_length = -1 self.subject_length_perc = -1 self.subject_seqlength = -1 ## Return a formated string of the attribute data # def toString( self ): string = "%s" % ( self.range_query.toString() ) string += "\t%i\t%f" % ( self.query_length, self.query_length_perc ) string += "\t%f" % ( self.match_length_perc ) string += "\t%s" % ( self.range_subject.toString() ) string += "\t%i\t%f" % ( self.subject_length, self.subject_length_perc ) string += "\t%g\t%i\t%f" % ( self.e_value, self.score, self.identity ) string += "\t%i" % ( self.id ) return string ## Return a Path instance # def getPathInstance( self ): p = Path() tuple = ( self.id, self.range_query.seqname, self.range_query.start, self.range_query.end, self.range_subject.seqname, self.range_subject.start, self.range_subject.end, self.e_value, self.score, self.identity ) p.setFromTuple( tuple ) return p ## Give information about a match whose query is included in the subject # # @return string # def getQryIsIncluded( self ): string = "query %s (%d bp: %d-%d) is contained in subject %s (%d bp: %d-%d): id=%.2f - %.3f - %.3f - %.3f" %\ ( self.range_query.seqname, self.query_seqlength, self.range_query.start, self.range_query.end, self.range_subject.seqname, self.subject_seqlength, self.range_subject.start, self.range_subject.end, self.identity, self.query_length_perc, self.match_length_perc, self.subject_length_perc ) return string ## Compare the object with another match and see if they are equal # (same identity, E-value and score + same subsequences whether in query or subject) # # @return True if objects are equals False otherwise # def isDoublonWith( self, match, verbose=0 ): # if both matches have same identity, score and E-value if self.identity == match.identity and self.score == match.score and self.e_value == match.e_value: # if query and subject are identical if ( self.range_query.seqname == match.range_query.seqname \ and self.range_subject.seqname == match.range_subject.seqname ): # if the coordinates are equal if self.range_query.__eq__( match.range_query ) and self.range_subject.__eq__( match.range_subject ): return True else: if verbose > 0: print "different coordinates"; sys.stdout.flush() return False # if query and subject are reversed but identical elif self.range_query.seqname == match.range_subject.seqname and self.range_subject.seqname == match.range_query.seqname: # if the coordinates are equal if self.range_query.__eq__( match.range_subject ) and self.range_subject.__eq__( match.range_query ): return True else: if verbose > 0: print "different coordinates"; sys.stdout.flush() return False else: if verbose > 0: print "different sequence names"; sys.stdout.flush() return False else: if verbose > 0: print "different match numbers"; sys.stdout.flush() return False
class Align( object ): ## Constructor # # @param range_q: a Range instance for the query # @param range_s: a Range instance for the subject # @param e_value: E-value of the match # @param identity: identity percentage of the match # @param score: score of the match # def __init__(self, range_q=Range(), range_s=Range(), e_value=0, identity=0, score=0): self.range_query = range_q self.range_subject = range_s self.e_value = float(e_value) self.score = int(score) self.identity = float(identity) ## Return True if the instance is empty, False otherwise # def isEmpty(self): return self.range_query.isEmpty() or self.range_subject.isEmpty() ## Equal operator # def __eq__(self, o): if self.range_query==o.range_query and self.range_subject==o.range_subject and \ self.e_value==o.e_value and self.score==o.score and self.identity==o.identity: return True return False ## Unequal operator # # @param o a Range instance # def __ne__(self, o): return not self.__eq__(o) ## Convert the object into a string # # @note used in 'print myObject' # def __str__( self ): return self.toString() ## Read attributes from an Align file # # @param fileHandler: file handler of the file being read # @return: 1 on success, 0 at the end of the file # def read(self, fileHandler): self.reset() line = fileHandler.readline() if line == "": return 0 tokens = line.split("\t") if len(tokens) < len(self.__dict__.keys()): return 0 self.setFromTuple(tokens) return 1 ## Set attributes from tuple # # @param tuple a tuple with (queryName,queryStart,queryEnd,subjectName,subjectStar,subjectEnd,E-value,score,identity) # @note data are loaded such that the query is always on the direct strand # def setFromTuple( self, tuple ): #TODO: we need to create Range instances because of __eq__() and isEmpty() tests, but WHY ??? self.range_query = Range() self.range_subject = Range() if int(tuple[1]) < int(tuple[2]): self.range_query.setFromTuple( ( tuple[0], tuple[1], tuple[2] ) ) self.range_subject.setFromTuple( ( tuple[3], tuple[4], tuple[5] ) ) else: self.range_query.setFromTuple( ( tuple[0], tuple[2], tuple[1] ) ) self.range_subject.setFromTuple( ( tuple[3], tuple[5], tuple[4] ) ) self.e_value = float(tuple[6]) self.score = float(tuple[7]) self.identity = float(tuple[8]) ## Reset # def reset( self ): self.range_query.reset() self.range_subject.reset() self.e_value = 0 self.score = 0 self.identity = 0 ## Return the attributes as a formatted string # def toString(self): string = "%s" % ( self.range_query.toString() ) string += "\t%s" % ( self.range_subject.toString() ) string += "\t%g\t%i\t%f" % ( self.e_value, self.score, self.identity ) return string ## Return the attributes as a GFF-formatted string # def toStringAsGff( self, source="REPET", type="match", phase=".", ID="", Parent="" ): if not self.isSubjectOnDirectStrand(): self.reverse() string = "%s" % ( self.getQueryName() ) string += "\t%s" % ( source ) string += "\t%s" % ( type ) string += "\t%s" % ( self.getQueryMin() ) string += "\t%s" % ( self.getQueryMax() ) string += "\t%g" % ( self.e_value ) string += "\t%s" % ( self.getQueryStrand() ) string += "\t%s" % ( phase ) attributes = "" if ID != "": attributes += "ID=%s" % ( ID ) else: attributes += "ID=%i" % ( str(time.time())[-8:-1].replace(".","") ) if Parent != "": attributes += ";Parent=%s" % ( Parent ) attributes += ";Target=%s %i %i" % ( self.getSubjectName(), self.getSubjectStart(), self.getSubjectEnd() ) string += "\t%s" % ( attributes ) return string ## Reverse query and subject # def reverse(self): self.range_query.reverse() self.range_subject.reverse() ## Show the attributes # def show(self): print self.toString() ## Write attributes into an Align file # # @param fileHandler: file handler of the file being filled # def write(self, fileHandler): fileHandler.write("%s\n" % (self.toString())) ## Save attributes into an Align file # # @param file: name of the file being filled # def save(self, file): fileHandler = open( file, "a" ) self.write( fileHandler ) fileHandler.close() ## Return the score # def getScore(self): return self.score ## Return the identity # def getIdentity(self): return self.identity def getEvalue(self): return self.e_value ## Return the length on the query # def getLengthOnQuery(self): return self.range_query.getLength() ## Return the name of the query # def getQueryName( self ): return self.range_query.seqname ## Return the start of the query # def getQueryStart( self ): return self.range_query.start ## Return the end of the query # def getQueryEnd( self ): return self.range_query.end ## Return the min of the query # def getQueryMin( self ): return self.range_query.getMin() ## Return the max of the query # def getQueryMax( self ): return self.range_query.getMax() ## Return the strand of the query # def getQueryStrand( self ): return self.range_query.getStrand() ## Return the name of the subject # def getSubjectName( self ): return self.range_subject.seqname ## Return the start of the subject # def getSubjectStart( self ): return self.range_subject.start ## Return the end of the subject # def getSubjectEnd( self ): return self.range_subject.end ## Return the strand of the subject # def getSubjectStrand( self ): return self.range_subject.getStrand() ## Return the query as a Range instance # def getQueryAsRange( self ): return self.range_query ## Return the subject as a Range instance # def getSubjectAsRange( self ): return self.range_subject ## Set the name of the query # def setQueryName( self, name ): self.range_query.seqname = name ## Set the start of the query # def setQueryStart( self, start ): self.range_query.start = start ## Set the end of the query # def setQueryEnd( self, end ): self.range_query.end = end ## Set the name of the subject # def setSubjectName( self, name ): self.range_subject.seqname = name ## Set the start of the subject # def setSubjectStart( self, start ): self.range_subject.start = start ## Set the end of the subject # def setSubjectEnd( self, end ): self.range_subject.end = end ## Merge the instance with another Align instance # # @param o an Align instance # def merge(self, o): if self.range_query.seqname != o.range_query.seqname \ or self.range_subject.seqname != o.range_subject.seqname: return self.range_query.merge(o.range_query) self.range_subject.merge(o.range_subject) self.score = max(self.score,o.score) self.e_value = min(self.e_value,o.e_value) self.identity = max(self.identity,o.identity) ## Return a Map instance with the subject mapped on the query # def getSubjectAsMapOfQuery(self): iMap = Map() iMap.name = self.range_subject.seqname iMap.seqname = self.range_query.seqname if self.range_subject.isOnDirectStrand(): iMap.start = self.range_query.start iMap.end = self.range_query.end else: iMap.start = self.range_query.end iMap.end = self.range_query.start return iMap ## Return True if query is on direct strand # def isQueryOnDirectStrand( self ): return self.range_query.isOnDirectStrand() ## Return True if subject is on direct strand # def isSubjectOnDirectStrand( self ): return self.range_subject.isOnDirectStrand() ## Return True if query and subject are on the same strand, False otherwise # def areQrySbjOnSameStrand(self): return self.isQueryOnDirectStrand() == self.isSubjectOnDirectStrand() ## Return False if query and subject are on the same strand, True otherwise # def areQrySbjOnOppositeStrands(self): return not self.areQrySbjOnSameStrand() ## Set attributes from string # # @param string a string formatted like queryName queryStart queryEnd subjectName subjectStart subjectEnd E-value score identity # @param sep field separator # def setFromString(self, string, sep="\t"): if string[-1] == "\n": string = string[:-1] self.setFromTuple( string.split(sep) ) ## Return a first Map instance for the query and a second for the subject # def getMapsOfQueryAndSubject(self): iMapQuery = Map( name="repet", seqname=self.range_query.seqname, start=self.range_query.start, end=self.range_query.end ) iMapSubject = Map( name="repet", seqname=self.range_subject.seqname, start=self.range_subject.start, end=self.range_subject.end ) return iMapQuery, iMapSubject ## Write query coordinates as Map in a file # # @param fileHandler: file handler of the file being filled # def writeSubjectAsMapOfQuery( self, fileHandler ): m = self.getSubjectAsMapOfQuery() m.write( fileHandler ) ## Return a bin for fast database access # def getBin(self): return self.range_query.getBin() ## Switch query and subject # def switchQuerySubject( self ): tmpRange = self.range_query self.range_query = self.range_subject self.range_subject = tmpRange if not self.isQueryOnDirectStrand(): self.reverse() ## Return True if the query overlaps with the query of another Align instance, False otherwise # def isQueryOverlapping( self, iAlign ): return self.getQueryAsRange().isOverlapping( iAlign.getQueryAsRange() ) ## Return True if the subject overlaps with the subject of another Align instance, False otherwise # def isSubjectOverlapping( self, iAlign ): return self.getSubjectAsRange().isOverlapping( iAlign.getSubjectAsRange() ) ## Return True if the Align instance overlaps with another Align instance, False otherwise # def isOverlapping( self, iAlign ): if self.isQueryOverlapping( iAlign ) and self.isSubjectOverlapping( iAlign ): return True else: return False ## Update the score # # @note the new score is the length on the query times the percentage of identity # def updateScore( self ): newScore = self.getLengthOnQuery() * self.getIdentity() / 100.0 self.score = newScore
def setFromTuple(self, tuple): self.name = tuple[0] Range.setFromTuple(self, tuple[1:])
def __eq__(self, o): if self.name == o.name: return Range.__eq__(self, o) return False
def __init__(self, name="", seqname="", start=-1, end=-1): self.name = name Range.__init__( self, seqname, start, end )
def toString(self): string = "%s" % (self.name) string += "\t%s" % (Range.toString(self)) return string