def _gapStatus(self): ### Predicts ancestral gap status '''Predicts ancestral gap status.''' try: ### Setup ### tree = self.obj['Tree'] gaspnode = self.gaspnode progress = 0.01 * 2 * len(tree.node[tree.stat['SeqNum']:]) + len(gaspnode[tree.node[-1]].sequence) p = 0 ### From tips to root ### self.log.printLog('\r#GASP','Calculating Gap Status (Down) ... 0.0%',log=False,newline=False) for node in tree.node[tree.stat['SeqNum']:]: an = gaspnode[node] anc = node.ancNode() desc = node.neighbours(ignore=[anc]) n = [gaspnode[desc[0]],gaspnode[desc[1]]] for r in range(len(an.sequence)): an.ancgap[r] = (n[0].ancgap[r] + n[1].ancgap[r])/2 an.nodegap = 1 p += 100.0 self.log.printLog('\r#GASP','Calculating Gap Status (Down) ... %.1f%%' % (p/progress),log=False,newline=False) ### Fix Root ### an = gaspnode[tree.node[-1]] for r in range(len(an.sequence)): if an.ancgap[r] <= 0.5: an.sequence = rje.strSub(an.sequence,r,r,'X') an.ancgap[r] = 0.0 else: an.sequence = rje.strSub(an.sequence,r,r,'-') an.ancgap[r] = 1.0 p += 100.0 self.log.printLog('\r#GASP','Calculating Gap Status (Root) ... %.1f%%' % (p/progress),log=False,newline=False) ### Back up tree using all 3 branches ### uptree = tree.node[tree.stat['SeqNum']:-1] uptree.reverse() for node in uptree: an = gaspnode[node] anc = node.ancNode() desc = node.neighbours(ignore=[anc]) n = [gaspnode[desc[0]],gaspnode[desc[1]],gaspnode[anc]] for r in range(len(an.sequence)): an.ancgap[r] = (n[0].ancgap[r] + n[1].ancgap[r] + n[2].ancgap[r])/3 if an.ancgap[r] <= 0.5: an.sequence = rje.strSub(an.sequence,r,r,'X') an.ancgap[r] = 0.0 else: an.sequence = rje.strSub(an.sequence,r,r,'-') an.ancgap[r] = 1.0 p += 100.0 self.log.printLog('\r#GASP','Calculating Gap Status ( Up ) ... %.1f%%' % (p/progress),log=False,newline=False) self.log.printLog('\r#GASP','Calculation of Gap Status complete. ') except: self.log.errorLog('Fatal Error during _gapStatus().') raise
def addPhos(self,seq,pos,type): ### Adds relevant marker to given sequence '''Adds relevant marker to given sequence.''' hierarchy = ['*','?','H','M','L','-'] markers = {'Hom':'?','ID':'*','Self':'*'} p = markers[type] x = seq.info['Sequence'][pos] if x not in hierarchy or hierarchy.index(x) > hierarchy.index(p): seq.info['Sequence'] = rje.strSub(seq.info['Sequence'],pos,pos,p)
def addPhos(self, seq, pos, type): ### Adds relevant marker to given sequence '''Adds relevant marker to given sequence.''' hierarchy = ['*', '?', 'H', 'M', 'L', '-'] markers = {'Hom': '?', 'ID': '*', 'Self': '*'} p = markers[type] x = seq.info['Sequence'][pos] if x not in hierarchy or hierarchy.index(x) > hierarchy.index(p): seq.info['Sequence'] = rje.strSub(seq.info['Sequence'], pos, pos, p)
def _gaspProbs(self, aalist, useanc=False, dir='down', aaprobs=True, aasub=False, aafix=False, gpass=0): ### Work through tree calculating AA probs ''' Work through tree calculating AA probs. >> aalist:list = alphabet >> useanc:boolean [False] = whether to consider predicted ancestor >> dir:str ['down'] = direction to move through tree ('up','down','rootonly') >> aaprobs:boolean [True] = whether to calculate AA probabilities >> aasub:boolean [False] = whether to change sequence to most likely AA >> aafix:boolean [False] = whether to fix most likely AA as probability 1.0 >> gpass:int [0] = Extra pass (for log output only) ''' try: ### Setup ### tree = self.obj['Tree'] seqlen = tree.node[0].obj['Sequence'].seqLen() forloop = tree.node[tree.stat['SeqNum']:] if dir == 'up': forloop.reverse() forloop = forloop[1:] elif dir == 'root': forloop = [forloop[-1]] logtxt = '(%s)' % dir if aasub: logtxt = 'substitutions (%s)' % dir elif aaprobs: logtxt = 'probabilities (%s)' % dir if gpass > 0: logtxt = '[Extra Pass %d] %s' % (gpass, logtxt) rstseq = ['' ] * seqlen # RST sequences for each column of alignment for node in tree.node[:tree.stat['SeqNum']]: for r in range(seqlen): rstseq[r] += node.obj['Sequence'].info['Sequence'][r] ### <b> ### Each gaspnode in turn for node in forloop: self.log.printLog('\r#GASP', 'Calculating GASP %s: %.1f%%' % (logtxt, 100.0 * float(forloop.index(node)) / len(forloop)), log=False, newline=False) an = self.gaspnode[node] if node == tree.node[-1]: # Root desc_seq = tree.node else: clades = tree.branchClades(node.ancBranch()) desc_seq = clades[1] ## <i> ## AA probabilities if aaprobs: # Establish relevant nodes and PAM distances anc = node.ancNode() desc = node.neighbours(ignore=[anc]) treenodes = desc n = [self.gaspnode[desc[0]], self.gaspnode[desc[1]]] # Gasp nodes p = {} if anc != None: treenodes.append(anc) # Tree Nodes n.append(self.gaspnode[anc]) for tn in treenodes: p[tn] = self.stat['FixPam'] # pam distances if self.stat['FixPam'] == 0: p[tn] = int(node.link(tn).stat['Length'] * 100) + 1 # Calculate Probabilities for each residue bigproblem = False for r in range(seqlen): if (an.ancgap[r] == 0) and (an.ancfix[r] == False): # No gap & not fixed if n[0].ancfix[r] and n[1].ancfix[r] and ( n[0].sequence[r] == n[1].sequence[r] ): # Fixed in descendants a = n[0].sequence[r] an.sequence = rje.strSub(an.sequence, r, r, a) an.ancaap[r][a] = 1.0 an.ancfix[r] = True else: # Variation for a in aalist: aap = [0.0] * 3 #print a, aap, 'PAM', p[desc[0]], p[desc[1]] for d in aalist: #print a, '->', d, 'PAM', p[desc[0]], tree.obj['PAM'].matrix[p[desc[0]]].pamp[a+d], '*' ,n[0].ancaap[r][d] try: aap[0] += tree.obj['PAM'].matrix[p[ desc[0]]].pamp[ a + d] * n[0].ancaap[r][d] except: self.errorLog( 'Big Problem with GASP Prob calculation', quitchoice=True) raise if bigproblem: self.errorLog( 'Big Problem with GASP Prob calculation' ) else: bigproblem = True #print a, '->', d, 'PAM', p[desc[1]], tree.obj['PAM'].matrix[p[desc[1]]].pamp[a+d], '*' ,n[1].ancaap[r][d] try: aap[1] += tree.obj['PAM'].matrix[p[ desc[1]]].pamp[ a + d] * n[1].ancaap[r][d] except: self.errorLog( 'Big Problem with GASP Prob calculation', quitchoice=True) raise if bigproblem: self.errorLog( 'Big Problem with GASP Prob calculation' ) else: bigproblem = True try: if useanc and anc != None: aap[2] += tree.obj[ 'PAM'].matrix[p[anc]].pamp[ d + a] * n[2].ancaap[r][d] except: self.errorLog( 'Big Problem with GASP Prob calculation', quitchoice=True) raise if bigproblem: self.errorLog( 'Big Problem with GASP Prob calculation' ) else: bigproblem = True if useanc and anc != None: an.ancaap[r][a] = (aap[0] + aap[1] + aap[2]) / 3 else: an.ancaap[r][a] = (aap[0] + aap[1]) / 2 # Adjust Probabilities to total 1, using RareCut and DescOnly if dir != 'up': desc_seq = tree.node an.adjustAncAAP(self.stat['RareCut'], desconly=self.opt['DescOnly'], desc=desc_seq) if self.opt['RST']: an.makeRST(rstseq) if aasub: for r in range(seqlen): if (an.sequence[r] != '-') & (an.ancfix[r] == False): maxp = 0.0 for a in aalist: if an.ancaap[r][a] > maxp: an.sequence = rje.strSub( an.sequence, r, r, a) maxp = an.ancaap[r][a] elif (an.ancaap[r][a] == maxp) & (self.aafreq[a] > self.aafreq[an.sequence[r]]): an.sequence = rje.strSub( an.sequence, r, r, a) if maxp == 0: # No AA => X an.sequence = rje.strSub( an.sequence, r, r, 'X') if aafix: for a in aalist: if a == an.sequence[r]: an.ancaap[r][a] = 1.0 else: an.ancaap[r][a] = 0.0 ### Finish ### self.log.printLog('\r#GASP', 'Calculated GASP %s: 100.0%%.' % logtxt) except: self.log.errorLog('Fatal Error during _gaspProbs().') raise
def _gapStatus(self): ### Predicts ancestral gap status """Predicts ancestral gap status.""" try: ### Setup ### tree = self.obj["Tree"] gaspnode = self.gaspnode progress = 0.01 * 2 * len(tree.node[tree.stat["SeqNum"] :]) + len(gaspnode[tree.node[-1]].sequence) p = 0 ### From tips to root ### self.log.printLog("\r#GASP", "Calculating Gap Status (Down) ... 0.0%", log=False, newline=False) for node in tree.node[tree.stat["SeqNum"] :]: an = gaspnode[node] anc = node.ancNode() desc = node.neighbours(ignore=[anc]) n = [gaspnode[desc[0]], gaspnode[desc[1]]] for r in range(len(an.sequence)): an.ancgap[r] = (n[0].ancgap[r] + n[1].ancgap[r]) / 2 an.nodegap = 1 p += 100.0 self.log.printLog( "\r#GASP", "Calculating Gap Status (Down) ... %.1f%%" % (p / progress), log=False, newline=False ) ### Fix Root ### an = gaspnode[tree.node[-1]] for r in range(len(an.sequence)): if an.ancgap[r] <= 0.5: an.sequence = rje.strSub(an.sequence, r, r, "X") an.ancgap[r] = 0.0 else: an.sequence = rje.strSub(an.sequence, r, r, "-") an.ancgap[r] = 1.0 p += 100.0 self.log.printLog( "\r#GASP", "Calculating Gap Status (Root) ... %.1f%%" % (p / progress), log=False, newline=False ) ### Back up tree using all 3 branches ### uptree = tree.node[tree.stat["SeqNum"] : -1] uptree.reverse() for node in uptree: an = gaspnode[node] anc = node.ancNode() desc = node.neighbours(ignore=[anc]) n = [gaspnode[desc[0]], gaspnode[desc[1]], gaspnode[anc]] for r in range(len(an.sequence)): an.ancgap[r] = (n[0].ancgap[r] + n[1].ancgap[r] + n[2].ancgap[r]) / 3 if an.ancgap[r] <= 0.5: an.sequence = rje.strSub(an.sequence, r, r, "X") an.ancgap[r] = 0.0 else: an.sequence = rje.strSub(an.sequence, r, r, "-") an.ancgap[r] = 1.0 p += 100.0 self.log.printLog( "\r#GASP", "Calculating Gap Status ( Up ) ... %.1f%%" % (p / progress), log=False, newline=False ) self.log.printLog("\r#GASP", "Calculation of Gap Status complete. ") except: self.log.errorLog("Fatal Error during _gapStatus().") raise
def _gaspProbs( self, aalist, useanc=False, dir="down", aaprobs=True, aasub=False, aafix=False, gpass=0 ): ### Work through tree calculating AA probs """ Work through tree calculating AA probs. >> aalist:list = alphabet >> useanc:boolean [False] = whether to consider predicted ancestor >> dir:str ['down'] = direction to move through tree ('up','down','rootonly') >> aaprobs:boolean [True] = whether to calculate AA probabilities >> aasub:boolean [False] = whether to change sequence to most likely AA >> aafix:boolean [False] = whether to fix most likely AA as probability 1.0 >> gpass:int [0] = Extra pass (for log output only) """ try: ### Setup ### tree = self.obj["Tree"] seqlen = tree.node[0].obj["Sequence"].seqLen() forloop = tree.node[tree.stat["SeqNum"] :] if dir == "up": forloop.reverse() forloop = forloop[1:] elif dir == "root": forloop = [forloop[-1]] logtxt = "(%s)" % dir if aasub: logtxt = "substitutions (%s)" % dir elif aaprobs: logtxt = "probabilities (%s)" % dir if gpass > 0: logtxt = "[Extra Pass %d] %s" % (gpass, logtxt) rstseq = [""] * seqlen # RST sequences for each column of alignment for node in tree.node[: tree.stat["SeqNum"]]: for r in range(seqlen): rstseq[r] += node.obj["Sequence"].info["Sequence"][r] ### <b> ### Each gaspnode in turn for node in forloop: self.log.printLog( "\r#GASP", "Calculating GASP %s: %.1f%%" % (logtxt, 100.0 * float(forloop.index(node)) / len(forloop)), log=False, newline=False, ) an = self.gaspnode[node] if node == tree.node[-1]: # Root desc_seq = tree.node else: clades = tree.branchClades(node.ancBranch()) desc_seq = clades[1] ## <i> ## AA probabilities if aaprobs: # Establish relevant nodes and PAM distances anc = node.ancNode() desc = node.neighbours(ignore=[anc]) treenodes = desc n = [self.gaspnode[desc[0]], self.gaspnode[desc[1]]] # Gasp nodes p = {} if anc != None: treenodes.append(anc) # Tree Nodes n.append(self.gaspnode[anc]) for tn in treenodes: p[tn] = self.stat["FixPam"] # pam distances if self.stat["FixPam"] == 0: p[tn] = int(node.link(tn).stat["Length"] * 100) + 1 # Calculate Probabilities for each residue bigproblem = False for r in range(seqlen): if (an.ancgap[r] == 0) and (an.ancfix[r] == False): # No gap & not fixed if ( n[0].ancfix[r] and n[1].ancfix[r] and (n[0].sequence[r] == n[1].sequence[r]) ): # Fixed in descendants a = n[0].sequence[r] an.sequence = rje.strSub(an.sequence, r, r, a) an.ancaap[r][a] = 1.0 an.ancfix[r] = True else: # Variation for a in aalist: aap = [0.0] * 3 # print a, aap, 'PAM', p[desc[0]], p[desc[1]] for d in aalist: # print a, '->', d, 'PAM', p[desc[0]], tree.obj['PAM'].matrix[p[desc[0]]].pamp[a+d], '*' ,n[0].ancaap[r][d] try: aap[0] += tree.obj["PAM"].matrix[p[desc[0]]].pamp[a + d] * n[0].ancaap[r][d] except: self.errorLog("Big Problem with GASP Prob calculation", quitchoice=True) raise if bigproblem: self.errorLog("Big Problem with GASP Prob calculation") else: bigproblem = True # print a, '->', d, 'PAM', p[desc[1]], tree.obj['PAM'].matrix[p[desc[1]]].pamp[a+d], '*' ,n[1].ancaap[r][d] try: aap[1] += tree.obj["PAM"].matrix[p[desc[1]]].pamp[a + d] * n[1].ancaap[r][d] except: self.errorLog("Big Problem with GASP Prob calculation", quitchoice=True) raise if bigproblem: self.errorLog("Big Problem with GASP Prob calculation") else: bigproblem = True try: if useanc and anc != None: aap[2] += tree.obj["PAM"].matrix[p[anc]].pamp[d + a] * n[2].ancaap[r][d] except: self.errorLog("Big Problem with GASP Prob calculation", quitchoice=True) raise if bigproblem: self.errorLog("Big Problem with GASP Prob calculation") else: bigproblem = True if useanc and anc != None: an.ancaap[r][a] = (aap[0] + aap[1] + aap[2]) / 3 else: an.ancaap[r][a] = (aap[0] + aap[1]) / 2 # Adjust Probabilities to total 1, using RareCut and DescOnly if dir != "up": desc_seq = tree.node an.adjustAncAAP(self.stat["RareCut"], desconly=self.opt["DescOnly"], desc=desc_seq) if self.opt["RST"]: an.makeRST(rstseq) if aasub: for r in range(seqlen): if (an.sequence[r] != "-") & (an.ancfix[r] == False): maxp = 0.0 for a in aalist: if an.ancaap[r][a] > maxp: an.sequence = rje.strSub(an.sequence, r, r, a) maxp = an.ancaap[r][a] elif (an.ancaap[r][a] == maxp) & (self.aafreq[a] > self.aafreq[an.sequence[r]]): an.sequence = rje.strSub(an.sequence, r, r, a) if maxp == 0: # No AA => X an.sequence = rje.strSub(an.sequence, r, r, "X") if aafix: for a in aalist: if a == an.sequence[r]: an.ancaap[r][a] = 1.0 else: an.ancaap[r][a] = 0.0 ### Finish ### self.log.printLog("\r#GASP", "Calculated GASP %s: 100.0%%." % logtxt) except: self.log.errorLog("Fatal Error during _gaspProbs().") raise