Пример #1
0
    def _gapStatus(self):   ### Predicts ancestral gap status
        '''Predicts ancestral gap status.'''
        try:
            ### Setup ###
            tree = self.obj['Tree']
            gaspnode = self.gaspnode
            progress = 0.01 * 2 * len(tree.node[tree.stat['SeqNum']:]) + len(gaspnode[tree.node[-1]].sequence)
            p = 0
            
            ###  From tips to root ###
            self.log.printLog('\r#GASP','Calculating Gap Status (Down) ... 0.0%',log=False,newline=False)
            for node in tree.node[tree.stat['SeqNum']:]:
                an = gaspnode[node]
                anc = node.ancNode()
                desc = node.neighbours(ignore=[anc])
                n = [gaspnode[desc[0]],gaspnode[desc[1]]]
                for r in range(len(an.sequence)):
                    an.ancgap[r] = (n[0].ancgap[r] + n[1].ancgap[r])/2
                an.nodegap = 1
                p += 100.0
                self.log.printLog('\r#GASP','Calculating Gap Status (Down) ... %.1f%%' % (p/progress),log=False,newline=False)

            ### Fix Root ###
            an = gaspnode[tree.node[-1]]
            for r in range(len(an.sequence)):
                if an.ancgap[r] <= 0.5:
                    an.sequence = rje.strSub(an.sequence,r,r,'X')
                    an.ancgap[r] = 0.0
                else:
                    an.sequence = rje.strSub(an.sequence,r,r,'-')
                    an.ancgap[r] = 1.0
                p += 100.0
                self.log.printLog('\r#GASP','Calculating Gap Status (Root) ... %.1f%%' % (p/progress),log=False,newline=False)

            ### Back up tree using all 3 branches ###
            uptree = tree.node[tree.stat['SeqNum']:-1]
            uptree.reverse()
            for node in uptree:
                an = gaspnode[node]
                anc = node.ancNode()
                desc = node.neighbours(ignore=[anc])
                n = [gaspnode[desc[0]],gaspnode[desc[1]],gaspnode[anc]]
                for r in range(len(an.sequence)):
                    an.ancgap[r] = (n[0].ancgap[r] + n[1].ancgap[r] + n[2].ancgap[r])/3
                    if an.ancgap[r] <= 0.5:
                        an.sequence = rje.strSub(an.sequence,r,r,'X')
                        an.ancgap[r] = 0.0
                    else:
                        an.sequence = rje.strSub(an.sequence,r,r,'-')
                        an.ancgap[r] = 1.0
                p += 100.0
                self.log.printLog('\r#GASP','Calculating Gap Status ( Up ) ... %.1f%%' % (p/progress),log=False,newline=False)

            self.log.printLog('\r#GASP','Calculation of Gap Status complete.      ')
        except:
            self.log.errorLog('Fatal Error during _gapStatus().')
            raise
Пример #2
0
 def addPhos(self,seq,pos,type): ### Adds relevant marker to given sequence
     '''Adds relevant marker to given sequence.'''
     hierarchy = ['*','?','H','M','L','-']
     markers = {'Hom':'?','ID':'*','Self':'*'}
     p = markers[type]
     x = seq.info['Sequence'][pos]
     if x not in hierarchy or hierarchy.index(x) > hierarchy.index(p):
         seq.info['Sequence'] = rje.strSub(seq.info['Sequence'],pos,pos,p)
Пример #3
0
 def addPhos(self, seq, pos,
             type):  ### Adds relevant marker to given sequence
     '''Adds relevant marker to given sequence.'''
     hierarchy = ['*', '?', 'H', 'M', 'L', '-']
     markers = {'Hom': '?', 'ID': '*', 'Self': '*'}
     p = markers[type]
     x = seq.info['Sequence'][pos]
     if x not in hierarchy or hierarchy.index(x) > hierarchy.index(p):
         seq.info['Sequence'] = rje.strSub(seq.info['Sequence'], pos, pos,
                                           p)
Пример #4
0
    def _gaspProbs(self,
                   aalist,
                   useanc=False,
                   dir='down',
                   aaprobs=True,
                   aasub=False,
                   aafix=False,
                   gpass=0):  ### Work through tree calculating AA probs
        '''
        Work through tree calculating AA probs.
        >> aalist:list = alphabet
        >> useanc:boolean [False] = whether to consider predicted ancestor
        >> dir:str ['down'] = direction to move through tree ('up','down','rootonly')
        >> aaprobs:boolean [True] = whether to calculate AA probabilities
        >> aasub:boolean [False] = whether to change sequence to most likely AA
        >> aafix:boolean [False] = whether to fix most likely AA as probability 1.0
        >> gpass:int [0] = Extra pass (for log output only)
        '''
        try:
            ### Setup ###
            tree = self.obj['Tree']
            seqlen = tree.node[0].obj['Sequence'].seqLen()
            forloop = tree.node[tree.stat['SeqNum']:]
            if dir == 'up':
                forloop.reverse()
                forloop = forloop[1:]
            elif dir == 'root':
                forloop = [forloop[-1]]
            logtxt = '(%s)' % dir
            if aasub:
                logtxt = 'substitutions (%s)' % dir
            elif aaprobs:
                logtxt = 'probabilities (%s)' % dir
            if gpass > 0:
                logtxt = '[Extra Pass %d] %s' % (gpass, logtxt)
            rstseq = [''
                      ] * seqlen  # RST sequences for each column of alignment
            for node in tree.node[:tree.stat['SeqNum']]:
                for r in range(seqlen):
                    rstseq[r] += node.obj['Sequence'].info['Sequence'][r]

            ### <b> ### Each gaspnode in turn
            for node in forloop:
                self.log.printLog('\r#GASP',
                                  'Calculating GASP %s: %.1f%%' %
                                  (logtxt, 100.0 * float(forloop.index(node)) /
                                   len(forloop)),
                                  log=False,
                                  newline=False)
                an = self.gaspnode[node]
                if node == tree.node[-1]:  # Root
                    desc_seq = tree.node
                else:
                    clades = tree.branchClades(node.ancBranch())
                    desc_seq = clades[1]
                ## <i> ## AA probabilities
                if aaprobs:
                    # Establish relevant nodes and PAM distances
                    anc = node.ancNode()
                    desc = node.neighbours(ignore=[anc])
                    treenodes = desc
                    n = [self.gaspnode[desc[0]],
                         self.gaspnode[desc[1]]]  # Gasp nodes
                    p = {}
                    if anc != None:
                        treenodes.append(anc)  # Tree Nodes
                        n.append(self.gaspnode[anc])
                    for tn in treenodes:
                        p[tn] = self.stat['FixPam']  # pam distances
                        if self.stat['FixPam'] == 0:
                            p[tn] = int(node.link(tn).stat['Length'] * 100) + 1
                    # Calculate Probabilities for each residue
                    bigproblem = False
                    for r in range(seqlen):
                        if (an.ancgap[r]
                                == 0) and (an.ancfix[r]
                                           == False):  # No gap & not fixed
                            if n[0].ancfix[r] and n[1].ancfix[r] and (
                                    n[0].sequence[r] == n[1].sequence[r]
                            ):  # Fixed in descendants
                                a = n[0].sequence[r]
                                an.sequence = rje.strSub(an.sequence, r, r, a)
                                an.ancaap[r][a] = 1.0
                                an.ancfix[r] = True
                            else:  # Variation
                                for a in aalist:
                                    aap = [0.0] * 3
                                    #print a, aap, 'PAM', p[desc[0]], p[desc[1]]
                                    for d in aalist:
                                        #print a, '->', d, 'PAM', p[desc[0]], tree.obj['PAM'].matrix[p[desc[0]]].pamp[a+d], '*' ,n[0].ancaap[r][d]
                                        try:
                                            aap[0] += tree.obj['PAM'].matrix[p[
                                                desc[0]]].pamp[
                                                    a + d] * n[0].ancaap[r][d]
                                        except:
                                            self.errorLog(
                                                'Big Problem with GASP Prob calculation',
                                                quitchoice=True)
                                            raise
                                            if bigproblem:
                                                self.errorLog(
                                                    'Big Problem with GASP Prob calculation'
                                                )
                                            else:
                                                bigproblem = True
                                        #print a, '->', d, 'PAM', p[desc[1]], tree.obj['PAM'].matrix[p[desc[1]]].pamp[a+d], '*' ,n[1].ancaap[r][d]
                                        try:
                                            aap[1] += tree.obj['PAM'].matrix[p[
                                                desc[1]]].pamp[
                                                    a + d] * n[1].ancaap[r][d]
                                        except:
                                            self.errorLog(
                                                'Big Problem with GASP Prob calculation',
                                                quitchoice=True)
                                            raise
                                            if bigproblem:
                                                self.errorLog(
                                                    'Big Problem with GASP Prob calculation'
                                                )
                                            else:
                                                bigproblem = True
                                        try:
                                            if useanc and anc != None:
                                                aap[2] += tree.obj[
                                                    'PAM'].matrix[p[anc]].pamp[
                                                        d +
                                                        a] * n[2].ancaap[r][d]
                                        except:
                                            self.errorLog(
                                                'Big Problem with GASP Prob calculation',
                                                quitchoice=True)
                                            raise
                                            if bigproblem:
                                                self.errorLog(
                                                    'Big Problem with GASP Prob calculation'
                                                )
                                            else:
                                                bigproblem = True
                                    if useanc and anc != None:
                                        an.ancaap[r][a] = (aap[0] + aap[1] +
                                                           aap[2]) / 3
                                    else:
                                        an.ancaap[r][a] = (aap[0] + aap[1]) / 2
                    # Adjust Probabilities to total 1, using RareCut and DescOnly
                    if dir != 'up':
                        desc_seq = tree.node
                    an.adjustAncAAP(self.stat['RareCut'],
                                    desconly=self.opt['DescOnly'],
                                    desc=desc_seq)
                    if self.opt['RST']:
                        an.makeRST(rstseq)
                if aasub:
                    for r in range(seqlen):
                        if (an.sequence[r] != '-') & (an.ancfix[r] == False):
                            maxp = 0.0
                            for a in aalist:
                                if an.ancaap[r][a] > maxp:
                                    an.sequence = rje.strSub(
                                        an.sequence, r, r, a)
                                    maxp = an.ancaap[r][a]
                                elif (an.ancaap[r][a]
                                      == maxp) & (self.aafreq[a] >
                                                  self.aafreq[an.sequence[r]]):
                                    an.sequence = rje.strSub(
                                        an.sequence, r, r, a)
                            if maxp == 0:  # No AA => X
                                an.sequence = rje.strSub(
                                    an.sequence, r, r, 'X')
                            if aafix:
                                for a in aalist:
                                    if a == an.sequence[r]:
                                        an.ancaap[r][a] = 1.0
                                    else:
                                        an.ancaap[r][a] = 0.0

            ### Finish ###
            self.log.printLog('\r#GASP',
                              'Calculated GASP %s: 100.0%%.' % logtxt)
        except:
            self.log.errorLog('Fatal Error during _gaspProbs().')
            raise
Пример #5
0
    def _gapStatus(self):  ### Predicts ancestral gap status
        """Predicts ancestral gap status."""
        try:
            ### Setup ###
            tree = self.obj["Tree"]
            gaspnode = self.gaspnode
            progress = 0.01 * 2 * len(tree.node[tree.stat["SeqNum"] :]) + len(gaspnode[tree.node[-1]].sequence)
            p = 0

            ###  From tips to root ###
            self.log.printLog("\r#GASP", "Calculating Gap Status (Down) ... 0.0%", log=False, newline=False)
            for node in tree.node[tree.stat["SeqNum"] :]:
                an = gaspnode[node]
                anc = node.ancNode()
                desc = node.neighbours(ignore=[anc])
                n = [gaspnode[desc[0]], gaspnode[desc[1]]]
                for r in range(len(an.sequence)):
                    an.ancgap[r] = (n[0].ancgap[r] + n[1].ancgap[r]) / 2
                an.nodegap = 1
                p += 100.0
                self.log.printLog(
                    "\r#GASP", "Calculating Gap Status (Down) ... %.1f%%" % (p / progress), log=False, newline=False
                )

            ### Fix Root ###
            an = gaspnode[tree.node[-1]]
            for r in range(len(an.sequence)):
                if an.ancgap[r] <= 0.5:
                    an.sequence = rje.strSub(an.sequence, r, r, "X")
                    an.ancgap[r] = 0.0
                else:
                    an.sequence = rje.strSub(an.sequence, r, r, "-")
                    an.ancgap[r] = 1.0
                p += 100.0
                self.log.printLog(
                    "\r#GASP", "Calculating Gap Status (Root) ... %.1f%%" % (p / progress), log=False, newline=False
                )

            ### Back up tree using all 3 branches ###
            uptree = tree.node[tree.stat["SeqNum"] : -1]
            uptree.reverse()
            for node in uptree:
                an = gaspnode[node]
                anc = node.ancNode()
                desc = node.neighbours(ignore=[anc])
                n = [gaspnode[desc[0]], gaspnode[desc[1]], gaspnode[anc]]
                for r in range(len(an.sequence)):
                    an.ancgap[r] = (n[0].ancgap[r] + n[1].ancgap[r] + n[2].ancgap[r]) / 3
                    if an.ancgap[r] <= 0.5:
                        an.sequence = rje.strSub(an.sequence, r, r, "X")
                        an.ancgap[r] = 0.0
                    else:
                        an.sequence = rje.strSub(an.sequence, r, r, "-")
                        an.ancgap[r] = 1.0
                p += 100.0
                self.log.printLog(
                    "\r#GASP", "Calculating Gap Status ( Up ) ... %.1f%%" % (p / progress), log=False, newline=False
                )

            self.log.printLog("\r#GASP", "Calculation of Gap Status complete.      ")
        except:
            self.log.errorLog("Fatal Error during _gapStatus().")
            raise
Пример #6
0
    def _gaspProbs(
        self, aalist, useanc=False, dir="down", aaprobs=True, aasub=False, aafix=False, gpass=0
    ):  ### Work through tree calculating AA probs
        """
        Work through tree calculating AA probs.
        >> aalist:list = alphabet
        >> useanc:boolean [False] = whether to consider predicted ancestor
        >> dir:str ['down'] = direction to move through tree ('up','down','rootonly')
        >> aaprobs:boolean [True] = whether to calculate AA probabilities
        >> aasub:boolean [False] = whether to change sequence to most likely AA
        >> aafix:boolean [False] = whether to fix most likely AA as probability 1.0
        >> gpass:int [0] = Extra pass (for log output only)
        """
        try:
            ### Setup ###
            tree = self.obj["Tree"]
            seqlen = tree.node[0].obj["Sequence"].seqLen()
            forloop = tree.node[tree.stat["SeqNum"] :]
            if dir == "up":
                forloop.reverse()
                forloop = forloop[1:]
            elif dir == "root":
                forloop = [forloop[-1]]
            logtxt = "(%s)" % dir
            if aasub:
                logtxt = "substitutions (%s)" % dir
            elif aaprobs:
                logtxt = "probabilities (%s)" % dir
            if gpass > 0:
                logtxt = "[Extra Pass %d] %s" % (gpass, logtxt)
            rstseq = [""] * seqlen  # RST sequences for each column of alignment
            for node in tree.node[: tree.stat["SeqNum"]]:
                for r in range(seqlen):
                    rstseq[r] += node.obj["Sequence"].info["Sequence"][r]

            ### <b> ### Each gaspnode in turn
            for node in forloop:
                self.log.printLog(
                    "\r#GASP",
                    "Calculating GASP %s: %.1f%%" % (logtxt, 100.0 * float(forloop.index(node)) / len(forloop)),
                    log=False,
                    newline=False,
                )
                an = self.gaspnode[node]
                if node == tree.node[-1]:  # Root
                    desc_seq = tree.node
                else:
                    clades = tree.branchClades(node.ancBranch())
                    desc_seq = clades[1]
                ## <i> ## AA probabilities
                if aaprobs:
                    # Establish relevant nodes and PAM distances
                    anc = node.ancNode()
                    desc = node.neighbours(ignore=[anc])
                    treenodes = desc
                    n = [self.gaspnode[desc[0]], self.gaspnode[desc[1]]]  # Gasp nodes
                    p = {}
                    if anc != None:
                        treenodes.append(anc)  # Tree Nodes
                        n.append(self.gaspnode[anc])
                    for tn in treenodes:
                        p[tn] = self.stat["FixPam"]  # pam distances
                        if self.stat["FixPam"] == 0:
                            p[tn] = int(node.link(tn).stat["Length"] * 100) + 1
                    # Calculate Probabilities for each residue
                    bigproblem = False
                    for r in range(seqlen):
                        if (an.ancgap[r] == 0) and (an.ancfix[r] == False):  # No gap & not fixed
                            if (
                                n[0].ancfix[r] and n[1].ancfix[r] and (n[0].sequence[r] == n[1].sequence[r])
                            ):  # Fixed in descendants
                                a = n[0].sequence[r]
                                an.sequence = rje.strSub(an.sequence, r, r, a)
                                an.ancaap[r][a] = 1.0
                                an.ancfix[r] = True
                            else:  # Variation
                                for a in aalist:
                                    aap = [0.0] * 3
                                    # print a, aap, 'PAM', p[desc[0]], p[desc[1]]
                                    for d in aalist:
                                        # print a, '->', d, 'PAM', p[desc[0]], tree.obj['PAM'].matrix[p[desc[0]]].pamp[a+d], '*' ,n[0].ancaap[r][d]
                                        try:
                                            aap[0] += tree.obj["PAM"].matrix[p[desc[0]]].pamp[a + d] * n[0].ancaap[r][d]
                                        except:
                                            self.errorLog("Big Problem with GASP Prob calculation", quitchoice=True)
                                            raise
                                            if bigproblem:
                                                self.errorLog("Big Problem with GASP Prob calculation")
                                            else:
                                                bigproblem = True
                                        # print a, '->', d, 'PAM', p[desc[1]], tree.obj['PAM'].matrix[p[desc[1]]].pamp[a+d], '*' ,n[1].ancaap[r][d]
                                        try:
                                            aap[1] += tree.obj["PAM"].matrix[p[desc[1]]].pamp[a + d] * n[1].ancaap[r][d]
                                        except:
                                            self.errorLog("Big Problem with GASP Prob calculation", quitchoice=True)
                                            raise
                                            if bigproblem:
                                                self.errorLog("Big Problem with GASP Prob calculation")
                                            else:
                                                bigproblem = True
                                        try:
                                            if useanc and anc != None:
                                                aap[2] += tree.obj["PAM"].matrix[p[anc]].pamp[d + a] * n[2].ancaap[r][d]
                                        except:
                                            self.errorLog("Big Problem with GASP Prob calculation", quitchoice=True)
                                            raise
                                            if bigproblem:
                                                self.errorLog("Big Problem with GASP Prob calculation")
                                            else:
                                                bigproblem = True
                                    if useanc and anc != None:
                                        an.ancaap[r][a] = (aap[0] + aap[1] + aap[2]) / 3
                                    else:
                                        an.ancaap[r][a] = (aap[0] + aap[1]) / 2
                    # Adjust Probabilities to total 1, using RareCut and DescOnly
                    if dir != "up":
                        desc_seq = tree.node
                    an.adjustAncAAP(self.stat["RareCut"], desconly=self.opt["DescOnly"], desc=desc_seq)
                    if self.opt["RST"]:
                        an.makeRST(rstseq)
                if aasub:
                    for r in range(seqlen):
                        if (an.sequence[r] != "-") & (an.ancfix[r] == False):
                            maxp = 0.0
                            for a in aalist:
                                if an.ancaap[r][a] > maxp:
                                    an.sequence = rje.strSub(an.sequence, r, r, a)
                                    maxp = an.ancaap[r][a]
                                elif (an.ancaap[r][a] == maxp) & (self.aafreq[a] > self.aafreq[an.sequence[r]]):
                                    an.sequence = rje.strSub(an.sequence, r, r, a)
                            if maxp == 0:  # No AA => X
                                an.sequence = rje.strSub(an.sequence, r, r, "X")
                            if aafix:
                                for a in aalist:
                                    if a == an.sequence[r]:
                                        an.ancaap[r][a] = 1.0
                                    else:
                                        an.ancaap[r][a] = 0.0

            ### Finish ###
            self.log.printLog("\r#GASP", "Calculated GASP %s: 100.0%%." % logtxt)
        except:
            self.log.errorLog("Fatal Error during _gaspProbs().")
            raise