Пример #1
0
 def _setupOutput(self): ### Sets up output files self.str['MapFas','MissFas','MapRes']
     '''Sets up output files self.str['MapFas','MissFas','MapRes'].'''
     ### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     delimit = rje.getDelimit(self.cmd_list)
     if self.str['StartFrom'].lower() in ['','none']: self.str['StartFrom'] = ''
     else:
         self.bool['Append'] = True
         self.printLog('#CMD','StartFrom = "%s" so Append=T' % self.str['StartFrom'])
     ### ~ [1] General ResFile ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     files = {'MapFas':'mapping.fas','MissFas':'missing.fas','MapRes':'mapping.%s' % rje.delimitExt(delimit)}
     if self.getBool('Combine'): files.pop('MissFas')
     if self.str['ResFile'].lower() in ['','none']:
         self.str['ResFile'] = '%s.%s' % (rje.baseFile(self.str['SeqIn']),rje.baseFile(self.str['MapDB'],strip_path=True))
     for file in files.keys():
         self.setStr({file: self.getStr('ResFile') + '.' + files[file]})
         rje.backup(self,self.getStr(file))
     ### ~ [2] Headers for MapRes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     #!# Consider replacing with rje_db object? #!#
     self.list['Headers'] = ['Query','Hit','Method','MapRank','BlastRank','EVal','Score']
     for qh in ['Query','Hit']:
         self.list['Headers'] += ['%s_Species' % qh]
         if self.bool['GablamOut']:
             for st in ['Len','Sim','ID']:
                 self.list['Headers'] += ['%s_%s' % (qh,st)]
     rje.delimitedFileOutput(self,self.str['MapRes'],self.list['Headers'],delimit)
Пример #2
0
    def convert(self,filelist=[],outfile=None):      ### Converts scansite output files in FileList to Outfile
        '''
        Converts scansite output files in FileList to Outfile.
        '''
        try:
            ### Setup ###
            _stage = 'Setup'
            if len(filelist) < 1:
                filelist = self.list['FileList']
            if not outfile:
                outfile = self.info['Name']          
            if len(filelist) < 1:
                self.log.errorLog('No scansite files to convert! %s unchanged/not made.' % outfile,printerror=False)
                return False
            delimit = rje.getDelimit(self.cmd_list)
            ext = rje.delimitExt(delimit)
            if ext != outfile[-3:]:
                newfile = outfile[:-3] + ext
                if rje.yesNo('Change file name from %s to %s?' % (outfile, newfile)):
                    outfile = newfile
            self.log.printLog('#OUT','Converting %d file(s), output to %s.' % (len(filelist),outfile))

            ### Output File ###
            _stage = 'Output File'
            if not self.opt['Append'] or not os.path.exists(outfile):   # Create with header
                OUTFILE = open(outfile,'w')
                headers = ['seq_id','enzyme','enz_group','aa','pos','score','percentile','matchseq','sa']
                rje.writeDelimit(OUTFILE,headers,delimit)
            else:
                OUTFILE = open(outfile,'a')

            ### Conversion ###
            _stage = 'Conversion'
            sx = 0
            for infile in filelist:
                if not os.path.exists(infile):
                    self.log.errorLog('Input file %s does not exist! :o(' % infile,False,False)
                    continue
                fx = 0
                INFILE = open(infile,'r')
                inline = rje.nextLine(INFILE)
                while inline != None:
                    if rje.matchExp(re_scansite,inline):
                        scanlist = rje.matchExp(re_scansite,inline)
                    rje.writeDelimit(OUTFILE,scanlist,delimit)
                    sx += 1
                    fx += 1
                    rje.progressPrint(self,sx)
                    inline = rje.nextLine(INFILE)
                self.log.printLog('#OUT','%s scansite results from %s. (%s Total.)' % (rje.integerString(fx),infile,rje.integerString(sx)))
                INFILE.close()

            ### End ###
            _stage = 'End'
            OUTFILE.close()
            self.log.printLog('#OUT','%s scansite results output to %s.' % (rje.integerString(sx),outfile))
            return True            
        except:
            self.log.errorLog('Error in convert(%s)' % _stage,printerror=True,quitchoice=False)
            raise   
Пример #3
0
    def ppiDisMatrix(self): ### Converts PPI Table into distance matrix
        '''Converts PPI Table into distance matrix.'''
        try:
            ### Check File ###
            if not os.path.exists(self.info['PPITab']):
                self.log.errorLog('PPI Table file "%s" missing!' % self.info['PPITab'],printerror=False)
                return False

            ### Setup ###            
            data = rje.dataDict(self,self.info['PPITab'],getheaders=True)
            headers = data.pop('Headers')
            ppidis = rje_dismatrix.DisMatrix(self.log,self.cmd_list)
            ppidis.opt['Symmetric'] = True
            ppidis.setInfo({'Name':'%s.ppi_dis.txt' % self.info['Basefile'],'Type':'PPI'})

            ### Make DisMatrix ###
            for p1 in headers[1:]:
                ppidis.addDis(p1,p1,0)
                for p2 in headers[headers.index(p1)+1:]:
                    ppi = 0
                    unique = 0
                    for i in data.keys():
                        try:
                            v1 = int(data[i][p1])
                        except:
                            v1 = data[i][p1]
                        try:
                            v2 = int(data[i][p2])
                        except:
                            v2 = data[i][p2]
                        if v1 or v2:
                            ppi += 1
                            if not (v1 and v2):
                                unique += 1
                    if self.opt['Scaled']:
                        ppidis.addDis(p1,p2,float(unique)/float(ppi))
                    else:
                        ppidis.addDis(p1,p2,unique)

            ### Output ###
            delimit = rje.getDelimit(self.cmd_list,default=',')
            ppidis.saveMatrix(headers[1:],ppidis.info['Name'],delimit)
            
        except:
            self.log.errorLog('Major problem with rje_ppi.ppiDisMatrix')
            return False
Пример #4
0
 def scap(self):     ### Full SCAP method
     '''Full SCAP method.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         markov = self.obj['Markov']
         minx = markov.stat['MinXmer']
         maxx = markov.stat['MaxXmer']
         headers = ['seq','type','sorted']
         for x in range(minx,maxx+1): headers.append('X%d' % x)
         delimit = rje.getDelimit(self.cmd_list,'\t')
         scapfile = '%s.%s' % (self.info['Basefile'],rje.delimitExt(delimit))
         rje.delimitedFileOutput(self,scapfile,headers,delimit,rje_backup=True)
         ### ~ [2] SCAP ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         ## ~ [2a] Query ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         (sx,stot) = (0.0,self.obj['SeqList'].seqNum())
         for seq in self.obj['SeqList'].seq:
             self.progLog('\r#SCAP','SCAP processing Query to %s: %.2f%%' % (scapfile,(sx/stot))); sx += 100.0
             datadict = {'seq':seq.shortName(),'type':'qry','sorted':markov.opt['Sorted']}
             for x in range(minx,maxx+1): 
                 datadict['X%d' % x] = self.scapSeq(seq.info['Sequence'],x)
                 if datadict['X%d' % x] > 0.001: datadict['X%d' % x] = '%.4f' % datadict['X%d' % x]
                 else: datadict['X%d' % x] = '%.3e' % datadict['X%d' % x]
             rje.delimitedFileOutput(self,scapfile,headers,delimit,datadict)
         self.printLog('\r#SCAP','SCAP processed Query to %s for %s sequences.' % (scapfile,rje.integerString(stot)))
         ## ~ [2b] Background ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         if self.obj['ScapBack'] != self.obj['SeqList']:
             (sx,stot) = (0.0,self.obj['ScapBack'].seqNum())
             for seq in self.obj['ScapBack'].seq:
                 self.progLog('\r#SCAP','SCAP processing Background to %s: %.2f%%' % (scapfile,(sx/stot))); sx += 100.0
                 datadict = {'seq':seq.shortName(),'type':'bg','sorted':markov.opt['Sorted']}
                 for x in range(minx,maxx+1):
                     datadict['X%d' % x] = self.scapSeq(seq.info['Sequence'],x)
                     if datadict['X%d' % x] > 0.001: datadict['X%d' % x] = '%.4f' % datadict['X%d' % x]
                     else: datadict['X%d' % x] = '%.3e' % datadict['X%d' % x]
                 rje.delimitedFileOutput(self,scapfile,headers,delimit,datadict)
             self.printLog('\r#SCAP','SCAP processed Background to %s for %s sequences.' % (scapfile,rje.integerString(stot)))
         if markov.opt['Sorted']: self.printLog('#SCAP','Sorted SCAP run complete')
         else: self.printLog('#SCAP','UnSorted SCAP run complete')
     except: self.errorLog(rje_zen.Zen().wisdom())
Пример #5
0
    def hmmTable(self,outfile='',append=False,delimit=None):    ### Outputs results table
        '''
        Outputs results table.
        >> outfile:str = Name of output file
        >> append:boolean = whether to append file
        >> delimit:str = Delimiter to use [\t]
        '''
        try:
            ### Setup ###
            if not outfile: outfile = self.info['HMMTab']
            if outfile.lower() == 'none':
                self.log.printLog('#TAB','HMMTab = "None": No table output')
                return False
            if not delimit: delimit = rje.getDelimit(self.cmd_list,'\t')
            if not outfile: outfile = '%s.hmmer.%s' % (rje.baseFile(self.info['SearchDB'],True),rje.delimitExt(delimit))
            self.readResults()
            self.log.printLog('#TAB','Tabulating results for %s searches into %s' % (len(self.search),outfile),log=False)

            ### Setup Resfile ###
            if self.opt['MySQL']: headers = ['HMM','Hit','Hit_Start','Hit_End','Eval','Score']
            else: headers = ['Type','Name','Start','End','Eval','Score']
            if not append or not os.path.exists(outfile): rje.delimitedFileOutput(self,outfile,headers,delimit,rje_backup=True)
            
            ### Output Search details ###
            for search in self.search:
                for hit in search.hit:
                    for aln in hit.aln:
                        out = {'HMM':search.info['Name'],'Type':search.info['Name'],
                               'Name':hit.info['Name'],'Hit':hit.info['Name'],
                               'Start':'%d' % aln.stat['SbjStart'], 'End':'%d' % aln.stat['SbjEnd'],
                               'Hit_Start':'%d' % aln.stat['SbjStart'], 'Hit_End':'%d' % aln.stat['SbjEnd'],
                               'Eval':'%.2e' % aln.stat['Expect'],'Score':'%.1f' % aln.stat['BitScore']}
                        rje.delimitedFileOutput(self,outfile,headers,delimit,out)
            self.log.printLog('#OUT','Results for %s searches output to %s.' % (len(self.search),outfile))
        except:
            self.log.errorLog('Fatal Error during hmmTable(%s).' % outfile)
            raise
Пример #6
0
    def mapPhosByBLAST(self,fasfile):   ### BLAST sequences against phosphoDB, align hits & mark sites (ID & Homology)
        '''BLAST sequences against phosphoDB, align hits and mark phosphosites (ID & Homology).'''
        try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            ## ~ [1a] Setup fasfile ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            scmd = self.cmd_list + ['seqin=%s' % fasfile,'autoload=T','autofilter=F']
            qseqlist = rje_seq.SeqList(self.log,scmd)
            qdict = qseqlist.seqNameDic()
            ## ~ [1b] Setup results files/directories ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            basefile = rje.baseFile(fasfile)
            if self.info['PhosRes'].lower() in ['','none']: self.info['PhosRes'] = '%s.phosres.tdt' % basefile
            headers = ['Name','Pos','AA','PELM','PELMPos','Evidence']
            delimit = rje.getDelimit(self.cmd_list,rje.delimitFromExt(filename=self.info['PhosRes']))
            rje.delimitedFileOutput(self,self.info['PhosRes'],headers,delimit,rje_backup=True)
            ppath = rje.makePath('PhosALN')
            rje.mkDir(self,ppath)
            ## ~ [1c] Setup BLAST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            pblast = rje_blast.BLASTRun(self.log,self.cmd_list+['formatdb=F'])
            pblast.setInfo({'Name':'%s.p.blast' % rje.baseFile(fasfile),'DBase':self.info['PELMFas'],'InFile':fasfile})
            pblast.setStat({'HitAln':pblast.stat['OneLine']})
            pblast.opt['Complexity Filter'] = False
            pblast.formatDB(force=False)
            ## ~ [1d] Setup GABLAM Stats ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            gkey = 'GABLAMO ID' #x# % self.info['GABLAMO Key']
            for g in ['ID','Hom']:
                if self.stat['%sSim' % g] < 1.0: self.stat['%sSim' % g] *= 100.0
                self.stat['%sSim' % g] = max(0.0,self.stat['%sSim' % g])

            ### ~ [2] PhosphoBLAST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            pblast.blast(use_existing=True,log=True)    # BLAST
            pblast.readBLAST(gablam=True)               # Read in
            while pblast.search:
                ## ~ [2a] Align relevant hits from each BLAST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                search = pblast.search.pop(0)
                qseq = qdict[search.info['Name']]
                idlist = []
                qlen = qseq.aaLen()
                hitdict = search.hitSeq(self.obj['SeqList'])
                aln = rje_seq.SeqList(self.log,self.cmd_list+['autoload=F','autofilter=F'])
                aln.seq = [qseq]
                pdict = {}      # Dictionary of {hseq:[poslist]}
                rdict = {qseq:0}      # Dictionary of {hseq:res}
                for hit in search.hit[0:]:
                    hseq = hitdict[hit]
                    pdict[hseq] = []
                    for pos in rje.sortKeys(self.dict['PhosphoSites'][hseq.info['AccNum']]): pdict[hseq].append(pos)
                    if hit.info['Name'] == search.info['Name']:
                        if qseq.getSequence(case=False,gaps=False) != hseq.getSequence(case=False,gaps=False):
                            self.log.errorLog('Major problem: Search/Hit sequence mismatch for same sequence "%s"' % hit.info['Name'])
                        idlist.append(qseq)
                        pdict[qseq] = pdict.pop(hseq)
                        continue
                    gdict = hit.globalFromLocal(qlen)
                    qvh = float(100 * gdict['Query'][gkey]) / float(qlen)
                    if qvh < self.stat['HomSim']:
                        pdict.pop(hseq)
                        continue
                    aln.seq.append(hseq)
                    if (qseq.sameSpec(hseq) or not self.opt['UseSpec']) and qvh >= self.stat['IDSim']: idlist.append(hseq)
                    rdict[hseq] = 0
                aln.muscleAln()   #x#outfile='%s%s.phosaln.fas' % (ppath,qseq.info['AccNum']))
                aln._addSeq('PhosAln','-' * qseq.seqLen())
                aln.info['Name'] = '%s%s.phosaln.fas' % (ppath,qseq.info['AccNum'])
                ## ~ [2b] Map phosphorylations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                print '>>>\n', aln.seq, pdict.keys(), rdict.keys()
                for a in range(qseq.seqLen()):
                    if qseq.info['Sequence'][a] != '-': rdict[qseq] += 1
                    for hseq in pdict:
                        if hseq.info['Sequence'][a] == '-': continue
                        if hseq != qseq: rdict[hseq] += 1
                        if rdict[hseq] in pdict[hseq] and qseq.info['Sequence'][a] == hseq.info['Sequence'][a]:  # Phosphosite
                            pdata = {'Name':search.info['Name'],'Pos':rdict[qseq],'AA':qseq.info['Sequence'][a],
                                     'PELM':hseq.shortName(),'PELMPos':rdict[hseq],'Evidence':'Hom'}
                            if hseq == qseq: pdata['Evidence'] = 'Self'
                            elif hseq in idlist: pdata['Evidence'] = 'ID'
                            rje.delimitedFileOutput(self,self.info['PhosRes'],headers,delimit,pdata)
                            self.addPhos(aln.seq[-1],a,pdata['Evidence'])
                ## ~ [2c] Add Scansite/NetPhos if made? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                ## ~ [2d] Save alignment ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                aln.saveFasta()


            # Align hits for each > X %ID
            # Map phosphosites onto alignment and output #
            
            return
        except: self.log.errorLog('Problem during PhosphoSeq.mapPhosByBLAST')
Пример #7
0
    def mapSeq(self,seqlist,blast,search,outputmap=True): ### Performs actual mapping of sequence
        '''
        Performs actual mapping of sequence.
        >> seq:SeqList object containing Sequence Object to be mapped
        >> blast:BLAST_Run object to perform BLAST and GABLAM
        >> search:Current BLAST search object for mapping
        >> outputmap:boolean = Whether to output mapping into a file [True]
        << returns shortName() of mapped sequence (or None if none)
        '''
        try:### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            seq = seqlist.getSeq(format='tuple')
            mapseq = self.obj['MapDB']
            hits = blast.db('Hit').indexEntries('Query',search)
            self.printLog('#HITS','%s vs %s = %d hits' % (search,blast.str['DBase'],len(hits)))
            hitseq = {}; hitdata = {}
            for entry in hits:
                hitseq[entry['Hit']] = mapseq.getDictSeq(entry['Hit'],format='tuple')
                hitdata[entry['Hit']] = entry
            resdict = {'Query':search,'Hit':None,'Method':'Failed','Query_Species':rje_sequence.specCodeFromName(seq[0])}
            ### ~ [1] Order Hits and Check Species ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            (hits,hitdict) = self.orderHits(seq,hits,hitseq)
            self.debug(hits)
            self.debug(hitdict)
            ### ~ [2] Attempt mapping ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            for method in self.list['Mapping']:
                resdict['Hit'] = self.mapHit(seq,hits,hitdict,method.lower())
                if resdict['Hit']:
                    resdict['Method'] = method[:1].upper() + method[1:].lower()
                    break
                elif method == 'gablam' and (len(hits) > 0):
                    resdict['Method'] = 'Rejected'
            self.debug(resdict)
            ### ~[3] Output! ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            if resdict['Hit']:  #hitdict[hit]['Data']['ShortName']
                hit = resdict['Hit']['Hit']     # resdict['Hit'] is the BLAST table entry for Hit
                shortname = hitdict[hit]['Data']['ShortName']   # This is just hit!
                self.printLog('#MAP','%s mapped to %s (by %s)' % (string.split(seq[0])[0],shortname,resdict['Method']))
                ## Update Stats ##
                self.debug('')
                resdict['BlastRank'] = hitdata[hit]['Rank']
                for key in hitdict[hit]: resdict[key] = hitdict[hit][key]
                ## Fasta and Redundancy ##
                if shortname in self.list['Mapped']: self.printLog('#MAP','%s already mapped before - not duplicating in %s' % (shortname,self.str['MapFas']))
                else:
                    self.list['Mapped'].append(shortname)
                    if outputmap:
                        open(self.str['MapFas'],'a').write('>%s\n%s\n' % (hitseq[hit][0],hitseq[hit][1]))
                resdict['Hit_Species'] = hitdict[hit]['Data']['SpecCode']
                resdict['Hit'] = shortname
            else:
                ### ~ [2] GREP-based search ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
                if 'grep' in self.list['Mapping']:
                    greplist = []; hitseq = ''
                    self.printLog('#GREP','grep %s %s -B 1' % (seq[1],blast.str['DBase']),log=False)
                    for line in os.popen('grep %s %s -B 1' % (seq[1],blast.str['DBase'])).readlines():
                        if line[:1] == '>': greplist.append(string.split(line[1:])[0])
                        elif not hitseq: hitseq = rje.chomp(line)
                    if greplist:
                        shortname = greplist.pop(0)
                        resdict['Hit'] = shortname
                        resdict['Method'] = 'Grep'
                        resdict['Qry_ID'] = '100.0'
                        resdict['Qry_Len'] = len(seq[1])
                        resdict['Hit_Len'] = len(hitseq)
                        resdict['Hit_ID'] = 100.0 * len(hitseq) / len(seq[1])
                        try: resdict['Hit_Species'] = string.split(shortname,'_')[1]
                        except: pass
                        if shortname in self.list['Mapped']:
                            self.printLog('#MAP','%s already mapped before - not duplicating in %s' % (shortname,self.str['MapFas']))
                        else:
                            self.list['Mapped'].append(shortname)
                            if outputmap: open(self.str['MapFas'],'a').write('>%s\n%s\n' % (shortname,hitseq))
                    for extra in greplist: self.printLog('#GREP','Warning! Query "%s" also hit "%s" with grep!' % (string.split(seq[0])[0],extra))
                if not resdict['Hit'] and self.bool['Combine']:
                    ## Fasta and Redundancy ##
                    shortname = string.split(seq[0])[0]
                    if shortname in self.list['Mapped']:
                        self.printLog('#FAS','%s already in output - not duplicating in %s' % (shortname,self.str['MapFas']))
                    else:
                        self.list['Mapped'].append(shortname)
                        if outputmap:
                            open(self.str['MapFas'],'a').write('>%s\n%s\n' % (seq[0],seq[1]))
                elif outputmap:
                    open(self.str['MissFas'],'a').write('>%s\n%s\n' % (seq[0],seq[1]))
                self.printLog('#MISS','%s mapping %s' % (resdict['Query'],resdict['Method']))
            if outputmap:
                rje.delimitedFileOutput(self,self.str['MapRes'],self.list['Headers'],rje.getDelimit(self.cmd_list),resdict)
            return resdict['Hit']

        except:
            self.errorLog('Fudgesticks! SeqMapper.mapSeq(%s) has died!' % seq[0],quitchoice=True)
            return False
Пример #8
0
 def run(self,imenu=False,outputmap=True,returndict=False):      ### Main controlling run Method
     '''
     Main controlling run Method.
     >> imenu:boolean = Whether to initiate interactive menu if appropriate [False].
     >> outputmap:boolean = Whether to output mapping into a file [True]
     >> returndict:boolean = Whether to return a dictionary of {searchname:mappedname} (no previous mapping) [False]
     '''
     try:### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not self.setup(imenu): raise ValueError
         seqlist = rje_seqlist.SeqList(self.log,self.cmd_list+['autoload=T','seqmode=file'])
         if not seqlist.seqNum(): self.warnLog('No sequences loaded for mapping.'); return {}
         ## ~ [0a] Setup BLAST Search ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         blast = rje_blast.BLASTRun(self.log,['blaste=1e-4','blastv=20','blastf=F']+self.cmd_list+['v=-1'])
         blast.setStr({'DBase':self.getStr('MapDB'),'Type':'blastp','InFile':self.getStr('SeqIn'),
                      'Name':'%s-%s.blast' % (rje.baseFile(self.str['SeqIn'],True),rje.baseFile(self.str['MapDB'],True))})  
         blast.setStat({'HitAln':blast.getStat('OneLine')})
         blast.list['ResTab'] = ['Search','Hit','GABLAM']
         if seqlist.nt(): blast.str['Type'] = 'blastx'
         ## ~ [0b] Setup Output ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         if outputmap: self._setupOutput()                           ## Output Files ##
         if returndict: mapdict = {}
         else: self._setupMapped()                                   ## Previously Mapped Sequences ##
         seqx = seqlist.seqNum()             ## Number of sequences ##
         ### ~ [1] BLAST Search Mapping ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.printLog('#BLAST','BLASTing %s vs %s.\n *** This could take some time if files are large. Please be patient! ***' % (self.str['SeqIn'],self.str['MapDB']),log=False)
         ## ~ [1a] Perform BLAST Unless it exists ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         blast.run(format=True)
         self.obj['DB'] = blast.obj['DB']
         ## ~ [1b] Mapping from searches ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         self.debug(self.getStr('MapDB'))
         self.obj['MapDB'] = rje_seqlist.SeqList(self.log,self.cmd_list+['autoload=F','seqmode=file','seqin=%s' % self.str['MapDB']])
         self.obj['MapDB'].loadSeq(self.getStr('MapDB'))
         self.debug('%s' % self.obj['MapDB'].list['Seq'])
         sx = 0
         while seqlist.nextSeq() != None:
             search = seqlist.getSeq(format='short')
             sx += 1
             ## Check StartFrom ##
             if self.str['StartFrom']:
                 if self.str['StartFrom'] != search:
                     self.progLog('\r#SKIP','Looking for %s: skipping %d seqs' % (self.str['StartFrom'],sx))
                     continue
                 self.str['StartFrom'] = ''
                 self.printLog('\r#SKIP','Starting from %s: skipped %d seqs' % (self.str['StartFrom'],sx))
             ## Check if in Mapped ##
             if search in self.list['Mapped']:
                 resdict = {'Query':search,'Hit':search,'Method':'Already Mapped!'}
                 self.printLog('#FAS','%s already in output - not duplicating in %s' % (search,self.str['MapFas']))
                 rje.delimitedFileOutput(self,self.str['MapRes'],self.list['Headers'],rje.getDelimit(self.cmd_list),resdict)
                 continue
             ### Map Sequence ###
             self.printLog('#MAP','Mapping %s seqs: %s of %s' % (self.str['SeqIn'],rje.integerString(sx),rje.integerString(seqx)))
             mapname = self.mapSeq(seqlist,blast,search)
             if returndict: mapdict[search] = mapname
         ### ~ [2] Finish ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.printLog('#MAP','Mapping of %s (%s seqs) complete.' % (self.str['SeqIn'],rje.integerString(seqx)))           
         if os.path.exists(blast.str['Name']) and not (self.getBool('DeBug') or self.test()): os.unlink(blast.str['Name'])     #!# Add option to keep BLAST! #!#
         if returndict: return mapdict
     except: self.errorLog('Error in SeqMapper.run()',printerror=True,quitchoice=True); raise   
Пример #9
0
 def _pepDis(self):      ### Peptide Distance
     '''
     Peptide Distance.
     '''
     try:
         ### <0> ### Setup
         seqlist = rje_seq.SeqList(self.log,self.cmd_list+['autoload=T'])
         dismatrix = rje_dismatrix.DisMatrix(self.log,self.cmd_list)
         dismatrix.info['Name'] = self.info['Method']
         dismatrix.opt['Symmetric'] = True
         if self.info['Method'] in ['ds_prop','tot_prop','best_prop']:
             aaprop = rje_aaprop.AAPropMatrix(self.log,self.cmd_list)
             #aaprop.readAAProp()
             aaprop.makePropDif()
         elif self.info['Method'] == 'pam':
             pam = rje_pam.PamCtrl(log=self.log,cmd_list=self.cmd_list)
         ### <1> ### Make DisMatrix
         for seq1 in seqlist.seq:
             for seq2 in seqlist.seq:
                 if seqlist.seq.index(seq1) > seqlist.seq.index(seq2):   # No need to calculate - symmetrical!
                     continue
                 dis = 0
                 if seq1 == seq2 and self.info['OutMatrix'] == 'phylip':
                     dis = 0
                 elif self.info['Method'] in ['ds_prop','ds_id']:
                     (self_dis1,self_dis2) = (0,0)
                     for r1 in range(seq1.seqLen()):
                         for r2 in range(r1,seq2.seqLen()):
                             (a1,a2) = (seq1.info['Sequence'][r1],seq2.info['Sequence'][r2])
                             (s1,s2) = (seq1.info['Sequence'][r2],seq2.info['Sequence'][r1])
                             phys_dis = r2 - r1
                             if self.info['Method'] == 'ds_prop':
                                 dis += (aaprop.pdif['%s%s' % (a1,a2)] * (seq1.seqLen() - phys_dis))
                                 self_dis1 += (aaprop.pdif['%s%s' % (a1,s1)] * (seq1.seqLen() - phys_dis))
                                 self_dis2 += (aaprop.pdif['%s%s' % (a2,s2)] * (seq1.seqLen() - phys_dis))
                             elif self.info['Method'] == 'ds_id' and a1 != a2:
                                 dis += (seq1.seqLen() - phys_dis)
                             if self.info['Method'] == 'ds_id' and a1 != s1:
                                 self_dis1 += (seq1.seqLen() - phys_dis)
                             if self.info['Method'] == 'ds_id' and a2 != s2:
                                 self_dis2 += (seq1.seqLen() - phys_dis)
                     dis -= (self_dis1 + self_dis2) / 2.0
                 elif self.info['Method'] == 'tot_prop':
                     proptot = {}
                     for property in aaprop.prop.keys():
                         proptot[property] = {seq1:0.0,seq2:0.0}
                     for seq in [seq1,seq2]:
                         for r in range(seq.seqLen()):
                             aa = seq.info['Sequence'][r]
                             for property in aaprop.prop.keys():
                                 proptot[property][seq] += string.atof(aaprop.prop[property][aa])
                     for property in aaprop.prop.keys():
                         if proptot[property][seq1] > proptot[property][seq2]:
                             dis += (proptot[property][seq1] - proptot[property][seq2])
                         else:
                             dis += (proptot[property][seq2] - proptot[property][seq1])
                 elif self.info['Method'] == 'pam':
                     dis = pam.pamML(ancseq=seq1.info['Sequence'],descseq=seq2.info['Sequence'])
                 elif self.info['Method'] == 'best_prop':
                     min_dis = seq1.seqLen() * len(aaprop.prop)
                     pepseq1 = seq1.info['Sequence']
                     for c in range(seq1.seqLen()):  # Circular start
                         dis = 0
                         pepseq2 = seq2.info['Sequence'][c:] + seq2.info['Sequence'][:c]
                         for r in range(seq1.seqLen()):
                             (a1,a2) = (pepseq1[r],pepseq2[r])
                             dis += aaprop.pdif['%s%s' % (a1,a2)]
                         if dis < min_dis:
                             min_dis = dis
                     dis = min_dis
                 dismatrix.addDis(seq1,seq2,dis)
         ### <2> ### Output
         if self.info['OutMatrix'] == 'phylip':
             delimit = ' '
             format = 'phylip'
         else:
             delimit = rje.getDelimit(self.cmd_list,',')
             format = 'None'
         outfile = '%s.%s.%s' % (rje.baseFile(seqlist.info['Name'],True),self.info['Method'],rje.delimitExt(delimit))
         dismatrix.saveMatrix(seqlist.seq,outfile,delimit,format=format)
                 
     except:
         self.log.errorLog('Error in _pepDis',printerror=True,quitchoice=False)
         raise   # Delete this if method error not terrible
Пример #10
0
	def run(self):		### Main Run method
		'''
		Main Run method.
		'''
		try:
			### SLiMDisc Run ###
			if self.opt['SLiMDisc']:
				return self.slimDisc()
			
			### TEIRESIAS ###
			if self.opt['Teiresias']:
				## Setup ##
				seqlist = rje_seq.SeqList(self.log,self.cmd_list)
				infile = '%s.teiresias.fas' % rje.baseFile(seqlist.info['Name'],True)
				outfile = '%s.teiresias.out' % rje.baseFile(seqlist.info['Name'],True)
				run_teiresias = True
				if rje.isYounger(outfile,infile) == outfile:
					if self.stat['Interactive'] < 1 or not rje.yesNo('%s and %s exist already. Regenerate?' % (infile,outfile),'N'):
						run_teiresias = False
				## Run TEIRESIAS ##
				if run_teiresias:
					seqlist.saveFasta(seqfile=infile,name='Teiresias')	### Saves sequences in fasta format
					command = rje.makePath(self.info['TeiresiasPath'],True)
					command += ' -i%s -o%s %s' % (infile,outfile,self.info['TeiresiasOpt'])
					self.log.printLog('#CMD',command)
					os.system(command)
				## Read Results ##
				self.verbose(0,2,'Reading TEIRESIAS output from %s...' % outfile,1)
				self.list['Pattern'] = []
				RESULTS = open(outfile,'r')
				line = RESULTS.readline()
				while line:
					if rje.matchExp('^(\d+)\s+(\d+)\s+(\S+)\s+(\d.+\d)$',line): # New pattern
						self.addTeiresiasPattern(rje.matchExp('^(\d+)\s+(\d+)\s+(\S+)\s+(\d.+\d)$',line))
					elif len(line) > 3 and line[0] != '#':
						self.log.errorLog('Did not recognise line: %s' % line,False,False)
					line = RESULTS.readline()
				RESULTS.close()
				patx = len(self.list['Pattern'])
				self.log.printLog('#PAT','%s TEIRESIAS patterns read from %s.' % (rje.integerString(patx),outfile))
				## Calculate Information Content ##
				aafreq = seqlist.aaFreq()
				self.verbose(0,3,'Calculating Information Content & Length stats...',0)
				occx = 0
				for pattern in self.list['Pattern']:
					pattern.stat['Info'] = self.calculateScore(pattern.info['Pattern'],aafreq)
					pattern._makeLength()
					occx += 1
					rje.progressPrint(self,occx,patx/100,patx/10)
				self.verbose(0,1,'...Done!',2)
				## Prepare Results ##
				delimit = rje.getDelimit(self.cmd_list)
				if self.info['Name'] == 'None':
					self.info['Name'] = '%s.teiresias.%s' % (rje.baseFile(seqlist.info['Name'],True),rje.delimitExt(delimit))
				if self.opt['MySQL']:	# Two tables
					patfile = os.path.splitext(self.info['Name'])
					occfile = '%s.occ%s' % (patfile[0],patfile[1])
					patfile = '%s.patterns%s' % (patfile[0],patfile[1])
					if self.opt['Append']:
						PATFILE = open(patfile,'a')
						OCCFILE = open(occfile,'a')
					else:
						PATFILE = open(patfile,'w')
						rje.writeDelimit(PATFILE,['pattern','tot_occ','seq_occ','info','len','fix','wild'],delimit)
						OCCFILE = open(occfile,'a')
						rje.writeDelimit(OCCFILE,['seq_id','pos','pattern','pat_match'],delimit)
				else:
					if self.opt['Append']:
						RESFILE = open(self.info['Name'],'a')
					else:
						RESFILE = open(patfile,'w')
						rje.writeDelimit(RESFILE,['Sequence Name','Position','Pattern','Match','Total Occurrences','Num Sequences','Information Content','Length','Fixed','Wildcard'],delimit)
				## Save Results ##
				occx = 0
				for pattern in self.list['Pattern']:
					patstats = []
					for stat in ['OccCount','SeqCount','Info','Length','Fixed','Wildcards']:
						patstats.append('%d' % pattern.stat[stat])
					patstats[2] = '%.3f' % pattern.stat['Info']
					if self.opt['MySQL']:	# Two tables
						rje.writeDelimit(PATFILE,[pattern.info['Pattern']] + patstats,delimit)
					for occ in rje.sortKeys(pattern.occ):
						seq = seqlist.seq[occ]
						for pos in pattern.occ[occ]:
							match = seq.info['Sequence'][pos:(pos+pattern.stat['Length'])]
							outlist = [seq.shortName(),'%d' % pos,pattern.info['Pattern'],match]
							if self.opt['MySQL']:	# Two tables
								rje.writeDelimit(OCCFILE,outlist,delimit)
							else:
								rje.writeDelimit(RESFILE,outlist+patstats,delimit)
							occx += 1
				if self.opt['MySQL']:	# Two tables
					PATFILE.close()
					OCCFILE.close()
					self.log.printLog('#OUT','%s patterns output to %s.' % (rje.integerString(patx),patfile))
					self.log.printLog('#OUT','%s pattern occurrences output to %s.' % (rje.integerString(occx),occfile))
				else:
					RESFILE.close()
					self.log.printLog('#OUT','%s occurrences of %s patterns output to %s.' %
									  (rje.integerString(occx),rje.integerString(patx),self.info['Name']))

			### InfoContent ###
			elif self.info['Info'] != 'None':
				## Setup ##
				alphabet = rje_seq.alph_protx 
				if not os.path.exists(self.info['Info']):
					self.log.errorLog('Input file %s missing!' % self.info['Info'],False,False)
					return False
				else:
					mypresto = presto.Presto(self.log,self.cmd_list)
					mypresto.loadMotifs(file=self.info['Info'],clear=True)
				seqlist = rje_seq.SeqList(self.log,self.cmd_list+['autoload=T'])
				if seqlist.seqNum() > 0:
					aafreq = seqlist.aaFreq(alphabet=None,fromfile=None,loadfile=None,total=False)  ### Returns dictionary of AA (& gap etc.) frequencies
				else:
					aafreq = {}
					for aa in alphabet:
						aafreq[aa] = 1.0 / len(alphabet)
				alphabet = aafreq.keys()
				maxinfo = 0 
				for aa in alphabet:
					maxinfo +=  (aafreq[aa] * math.log(aafreq[aa],2))
				## Output ##
				delimit = rje.getDelimit(self.cmd_list)
				ext = rje.delimitExt(delimit)
				outfile = '%s.info.%s' % (rje.baseFile(self.info['Info'],True,['.txt','.%s' % ext]),ext)
				if self.opt['Append']:
					OUTFILE = open(outfile,'a')
				else:
					OUTFILE = open(outfile,'w')
					rje.writeDelimit(OUTFILE,['motif','pattern','info'],delimit)
				
				## Calculate Information Scores ##
				for motif in mypresto.motif:
					self.verbose(2,4,motif.info['Sequence'],0)
					pattern = string.replace(motif.info['Sequence'],'X','.')
					elements = string.split(pattern,'-')
					pattern = ''
					for el in elements:
						if el.find('.{') == 0:	# Ambiguous spacer length - compress
							pattern += '.'
						else:
							pattern += el
					self.verbose(2,2,'=> %s' % pattern,1)
					motif.stat['Info'] = self.calculateInformationContent(pattern,aafreq,maxinfo,self.stat['InfoGapPen'])
					self.verbose(0,3,'%s (%s) = %.2f' % (motif.info['Name'],pattern,motif.stat['Info']),1)
					## Output ##
					rje.writeDelimit(OUTFILE,[motif.info['Name'],pattern,'%.2f' % motif.stat['Info']],delimit)
				
				## Finish ##
				OUTFILE.close()
		except:
			self.log.errorLog('Error in run().',printerror=True,quitchoice=False)
			raise	# Delete this if method error not terrible
Пример #11
0
    def _pepStats(self):      ### Peptide Distance
        '''
        Peptide Distance.
        '''
        try:
            ### Setup ###
            seqlist = rje_seq.SeqList(self.log,self.cmd_list+['autoload=T'])
            aaprop = rje_aaprop.AAPropMatrix(self.log,self.cmd_list)
            aaprop.makePropDif()
            delimit = rje.getDelimit(self.cmd_list)

            ### Output File Setup ###
            OUTFILE = open('hrb.pepstats.%s' % rje.delimitExt(delimit),'w')
            headlist = ['peptide']
            ## 10 Dimensional Peptide Property Output ##
            for property in rje.sortKeys(aaprop.prop):
                headlist.append(property.lower())
                for aa in aaprop.prop[property].keys():
                    try:
                        if aa not in ['-','X']:
                            aaprop.prop[property][aa] = string.atoi(aaprop.prop[property][aa])
                    except:
                        print aaprop.prop, property, aa, aaprop.prop[property][aa]
                        raise
            ## Additional Stats ##
            headlist.append('net_charge')
            #headlist.append('hydrophobicity')
            headlist.append('charge_balance')
            headlist.append('hydrophobic_balance')
            #headlist.append('hydrophobicity_balance')
            ## Output
            rje.writeDelimit(OUTFILE,headlist,delimit)
            
            ### Calculate stats ###
            for pep in seqlist.seq:
                pepname = pep.shortName()
                if rje.matchExp('^(\S+_\d[CQ])',pepname):
                    pepname = rje.matchExp('^(\S+_\d[CQ])',pepname)[0]
                outlist = [pepname]
                pepseq = pep.info['Sequence']
                ## 10 Dimensional Peptide Property Output ##
                for property in rje.sortKeys(aaprop.prop):
                    px = 0
                    for aa in pepseq:
                        px += aaprop.prop[property][aa]
                    outlist.append('%d' % px)
                ## Additional Stats ##
                net_charge = 0
                for aa in pepseq:
                    net_charge += (aaprop.prop['Positive'][aa] - aaprop.prop['Negative'][aa])
                outlist.append('%d' % net_charge)
                charge_balance = 0
                hydrophobic_balance = 0
                for r in range(len(pepseq)):
                    charge_balance += aaprop.prop['Charged'][pepseq[r]] * (1.0 / (r+1))
                    charge_balance -= aaprop.prop['Charged'][pepseq[r]] * (1.0 / (10-r))
                    hydrophobic_balance += aaprop.prop['Hydrophobic'][pepseq[r]] * (1.0 / (r+1))
                    hydrophobic_balance -= aaprop.prop['Hydrophobic'][pepseq[r]] * (1.0 / (10-r))
                outlist.append('%.3f' % charge_balance)
                outlist.append('%.3f' % hydrophobic_balance)
                rje.writeDelimit(OUTFILE,outlist,delimit)
                        
            ### Finish ###
            OUTFILE.close()
                    
        except:
            self.log.errorLog('Error in _pepStats',printerror=True,quitchoice=False)
            raise   # Delete this if method error not terrible
Пример #12
0
def badasp(out,mainlog,cmd_list,tree=None): ### Main BADASP Method
    '''
    Main BADASP Method. Automated run if interactive < 1
    <1> Load Sequences and Tree
    <2> Define Subfamilies
    <3> GASP Ancestral Sequence Prediction
    <4> Peform Functional Specificity and Sequence Conservation Calculations
    <5> Output Results
    '''
    try:    ### <0> ### Setup
        _seqfile = None
        _treefile = None
        append_file = None
        basefile = None
        for cmd in cmd_list:
            if cmd.find('seqin=') == 0:
                _seqfile = cmd[len('seqin='):]
                if _seqfile[-4] == '.':
                    _seqfile = _seqfile[:-4]
            if cmd.find('useanc=') == 0:
                _seqfile = cmd[len('useanc='):]
                if _seqfile[-8:] == '.anc.fas':
                    _seqfile = _seqfile[:-8]
            if cmd.find('nsfin=') == 0:
                _treefile = cmd[len('nsfin='):]
            if cmd.find('append=') == 0:
                append_file = cmd[len('append='):]
            if cmd.find('basefile=') == 0:
                basefile = cmd[len('basefile='):]
        if _seqfile and os.path.exists('%s.grp' % _seqfile):
            cmd_list.append('group=%s.grp' % _seqfile)
        if _seqfile and _treefile == None:
            if rje.checkForFile('%s.nwk' % _seqfile): _treefile = '%s.nwk' % _seqfile
            else: _treefile = '%s.nsf' % _seqfile
            out.verbose(0,2,'Looking for treefile %s.' % _treefile,1)
            if rje.checkForFile(_treefile):
                cmd_list.append('nsfin=%s' % _treefile)
                
        if tree == None:
            mainlog.verbose(0,1,'Tree: %s' % cmd_list,2)
            tree = rje_tree.Tree(log=mainlog,cmd_list=cmd_list)
            #tree._setupFromCmd()
        if tree.stat['MinFamNum'] < 2:
            tree.stat['MinFamNum'] = 2

            ### <1> ### Load Sequences and Tree
        while out.stat['Interactive'] > 0 or tree.obj['SeqList'] == None:
            tree = rje_tree.treeMenu(out,mainlog,['root=yes']+cmd_list,tree)
            if tree.obj['SeqList'] and tree.opt['Rooted']:
                break
            else:
                print '\n ** Must have loaded sequences and a rooted tree. ** \n'
                if out.stat['Interactive'] < 0 or rje.yesNo('Quit BADASP?',default='N'):
                    sys.exit()

        basename = tree.obj['SeqList'].info['Name']
        if basename[-4:] == '.fas':
            basename = basename[:-4]
        if basename[-4:] == '.anc':
            basename = basename[:-4]
        if basefile:
            basename = basefile
                                
    except SystemExit:
        raise
    except:
        mainlog.errorLog('Major Error in badasp loading sequences and tree',True)

    try:    ### <2> ### Define Subfamilies
        while out.stat['Interactive'] > 0 or tree.groupNum() < 2:
            tree.treeGroup(callmenu=True)
            if tree.groupNum() >= 2:
                break
            else:
                mainlog.errorLog('Must have at least two subfamilies for specificity analyses.',printerror=False)
                if out.stat['Interactive'] < 0 or rje.yesNo('Continue without specificity analyses?'):
                    cmd_list.append('funcspec=')
                    break
                elif rje.yesNo('Abort BADASP?'):
                    sys.exit()
    except SystemExit:
        raise
    except:
        mainlog.errorLog('Major Error in BADASP subfamilies',True)

    try:    ### <3> ### GASP Ancestral Sequence Prediction
        if tree.node[-1].obj['Sequence'] == None:   # No ancseq loaded
            while out.stat['Interactive'] > 0 and rje.yesNo('Use %s for output filenames?' % basename) == False:
                basename = rje.choice('FILEname (FILE.anc.fas, FILE.anc.nsf, FILE.txt)?: ', default=basename)
            mygasp = rje_ancseq.Gasp(tree=tree,ancfile=basename,cmd_list=cmd_list,log=mainlog)
            out.verbose(0,2,'%s' % mygasp.details(),1)
            if out.stat['Interactive'] > 0:
                if rje.yesNo('Use these parameters?') == False:
                    mygasp.edit()
            mygasp.gasp()
    except:
        mainlog.errorLog('Major Error in BADASP GASP',True)
        
    try:    ### <4> ### Peform Functional Specificity and Sequence Conservation Calculations
        _stage = '<4> Specificity/Conservation Analyses'
        aaprop = rje_aaprop.AAPropMatrix(log=mainlog,cmd_list=cmd_list)
        query = tree.obj['SeqList'].obj['QuerySeq']
        ## <a> ## Chosen Methods
        _stage = '<4a> Specificity/Conservation Analyses - Chosen Methods'
        funcspec = rje_specificity.methodlist   # ['BAD','BADN','BADX']
        seqcon = rje_conseq.methodlist # ['info']
        for cmd in cmd_list:
            if cmd.find('funcspec=') == 0:
                funcspec = cmd[9:].split(',')
            if cmd.find('seqcon=') == 0:
                seqcon = cmd[len('seqcon='):].split(',')
        if 'all' in funcspec:
            funcspec = rje_specificity.methodlist
        if 'all' in seqcon:
            seqcon = rje_conseq.methodlist
        for method in ['BADX','BADN','QPCon_Mean','QPCon_Abs','QPCon_Mean_All']:
            while method in funcspec and query == None:
                if rje.yesNo('Method %s needs query but none given. Drop %s from specificity methods?' % (method,method)):
                    funcspec.remove(method)
                    break
                for seq in tree.obj['SeqList'].seq:
                    if rje.yesNo('Method %s needs query but none given. Use sequence 1 (%s)?' % (method,seq.shortName()),default='N'):
                        query = seq
                        tree.obj['SeqList'].obj['Query'] = seq
                        break
            while method in seqcon and query == None:
                if rje.yesNo('Method %s needs query but none given. Drop %s from conservation methods?' % (method,method)):
                    seqcon.remove(method)
                    break
                for seq in tree.obj['SeqList'].seq:
                    if rje.yesNo('Method %s needs query but none given. Use sequence 1 (%s)?' % (method,seq.shortName()),default='N'):
                        query = seq
                        tree.obj['SeqList'].obj['Query'] = seq
                        break
                
        qname = query
        if query:
            qname = query.info['Name']
        out.verbose(0,3,'\nQuery = %s' % qname,2)

        ## <b> ## Spec Calculations
        _stage = '<4b> Specificity Calculations'
        specmatrix = rje_specificity.FuncSpec(log=mainlog,cmd_list=cmd_list,tree=tree,aaprop=aaprop)
        specmatrix.calcScore(query=query,methods=funcspec)

        ## <c> ## Conservation Calculations
        _stage = '<4c> Specificity/Conservation Analyses - Conservation Calculations'
        conseq = rje_conseq.SeqStat(log=mainlog,cmd_list=cmd_list,tree=tree,aaprop=aaprop)
        conseq.calcScore(query=query,methods=seqcon)   ### Sends appropriate seqlist to self.calcScore()

        ## <d> ## Special Case: QPCon vs All seqs
        _stage = '<4d> Specificity/Conservation Analyses - QPCon vs All'
        qpconall = []
        #if 'QPCon_Abs_All' in seqcon and query:
        #    qpconall.append('QPCon_Abs')
        if 'QPCon_Mean_All' in seqcon and query:
            qpconall.append('QPCon_Mean')
        for qp in qpconall:
            conseq.score['%s_All' % qp] = conseq.score[qp] 
            if conseq.alnwin.has_key(qp):
                conseq.alnwin['%s_All' % qp] = conseq.alnwin[qp] 
            if conseq.qrywin.has_key(qp):
                conseq.qrywin['%s_All' % qp] = conseq.qrywin[qp] 
            if conseq.rank.has_key(qp):
                conseq.rank['%s_All' % qp] = conseq.rank[qp] 
            if conseq.alnrankwin.has_key(qp):
                conseq.alnrankwin['%s_All' % qp] = conseq.alnrankwin[qp] 
            if conseq.qryrankwin.has_key(qp):
                conseq.qryrankwin['%s_All' % qp] = conseq.qryrankwin[qp] 
        
        _stage = '<4d> Specificity/Conservation Analyses - FamQP'
        famqp = []
        if 'QPCon_Mean' in seqcon:
            famqp.append('QPCon_Mean')
        if 'QPCon_Abs' in seqcon:
            famqp.append('QPCon_Abs')
        if len(famqp) > 0 and query:    #!# And subfam option?
            qseq = []
            for fam in tree.subfam:
                for node in tree._nodeClade(fam):
                    if query == node.obj['Sequence']:
                        for qnode in tree._nodeClade(fam):
                            qseq.append(qnode.obj['Sequence'])
            conseq.calcScore(query=query,seqlist=qseq,methods=famqp)   ### Sends appropriate seqlist to self.calcScore()

    except:
        mainlog.errorLog('Major Error in BADASP Specificity Analysis (%s):' % _stage,True)
        
    try:    ### <5> ### Full Output Results
        _stage = '<5> Full Output'
        # This output is in a tab- or comma-delimited file for easy manipulation or viewing with other programs.
        # (1) statistics for a given residue;
        # (2) statistics for a given window size across
        # - (a) the whole alignment,    (node=None)
        # - (b) the Query protein of interest (if given) and    (node=QueryNode)
        # - (c) the ancestral sequence of each subfamily;   (node=ancnode)
        # (3) Predicted ancestral sequences at
        # - (a) the root and
        # - (b) the ancestor of each subfamily.
        delimit = rje.getDelimit(cmd_list)

        ## <a> ## Setup        
        _stage = '<5a> Output - Setup'
        rankout = specmatrix.opt['Rank']
        #tree._regenerateSeqList(tree.obj['SeqList'],tree.node)
        root = tree.node[-1].obj['Sequence']      #!# At some point, make sure this is the most ancient duplication!
        out.verbose(0,3,'\nBADASP Results Output (%s.badasp) ...' % basename,0)

        ## <b> ## Header
        _stage = '<5b> Output - Header'
        _header = True
        if append_file:
            if rje.checkForFile(append_file):
               _header = False
            BADASP = open(append_file, 'a')
        else:
            BADASP = open('%s.badasp' % basename, 'w')
            BADASP.write("BADASP Output: %s\n" % (time.asctime(time.localtime(time.time()))))
            BADASP.write('%s\n\n' % cmd_list)        
        header = ['aln_pos','anc_aa'] # Aln Pos and AA
        alnlen = 0
        statlist = funcspec + seqcon
        _stage = '<5b-i> Output - Header Query'
        if query:
            header += ['qry_pos','qry_aa']  # Qry Pos and AA
        _stage = '<5b-ii> Output - Header Subfam'
        for f in range(len(tree.subfam)):
            header += ['fam%d_pos' % (f+1),'fam%d_aa' % (f+1)]   # Subfam Pos and AA
        for func in statlist:
            _stage = '<5b-iii> Output - Header %s' % func
            statobj = statObj(method=func,objlist=[specmatrix,conseq])
            fs = func.lower()
            alnlen = len(statobj.score[func])                
            header.append(fs)                   # Score
            if rankout:
                header.append('%s_rank' % fs)   # Rank
            if statobj.stat['WinSize'] > 1:
                header.append('%s_alnwin' % fs)     # Full align window
                if rankout:
                    header.append('%s_alnrankwin' % fs)   # Rank
                if query:
                    header.append('%s_qrywin' % fs) # Qry window
                    if rankout:
                        header.append('%s_qryrankwin' % fs)   # Rank
                if func in funcspec:
                    for f in range(len(tree.subfam)):
                        header.append('%s_fam%d_win' % (fs,f+1))  # Subfam windows
                        if rankout:
                            header.append('%s_fam%d_rankwin' % (fs,f+1))  # Subfam windows                    
        #if _header:
        BADASP.write('%s\n' % string.join(header, delimit))
        out.verbose(1,3,'%s...' % string.join(header, delimit),0)

        ## <c> ## Stats
        _stage = '<5c> Stats'
        qr = 0  # Qry pos
        fr = [0] * len(tree.subfam) # List of subfam positions
        aa = '' # Root aa
        qa = '' # Qry aa
        fa = [''] * len(tree.subfam) # List of subfam aas
        for r in range(alnlen):
            # <i> # Positions and aas
            _stage = '<5c-i> Output - Stats, positions & aas'
            aa = root.info['Sequence'][r]
            if query:
                qa = query.info['Sequence'][r]
                if qa != '-':
                    qr += 1
            for f in range(len(tree.subfam)):
                fa[f] = tree.subfam[f].obj['Sequence'].info['Sequence'][r]
                if fa[f] != '-':
                    fr[f] += 1

            # <ii> # Positions and AAs ii
            _stage = '<5c-ii> Output - Pos & AA ii'
            line = ['%d' % (r+1), aa]    # Aln Pos and AA
            if query:
                if qa == '-':               
                    line += ['-',qa]  # Qry Pos and AA
                else:               
                    line += ['%d' % qr,qa]  # Qry Pos and AA
            for f in range(len(tree.subfam)):
                if fa[f] == '-':
                    line += ['-',fa[f]]   # Subfam Pos and AA
                else:
                    line += ['%d' % fr[f],fa[f]]   # Subfam Pos and AA

            # <iii> # Stats                        
            _stage = '<5c-iii> Output - Stats'
            for func in statlist:
                statobj = statObj(method=func,objlist=[specmatrix,conseq])
                fs = func.lower()
                line.append(str(statobj.score[func][r]))   # Score
                if rankout:
                    line.append(str(statobj.rank[func][r]))   # Rank
                if specmatrix.stat['WinSize'] > 1:
                    line.append(str(statobj.alnwin[func][r]))     # Full align window
                    if rankout:
                        line.append(str(statobj.alnrankwin[func][r]))   # Rank
                    if query:
                        line.append(str(statobj.qrywin[func][r])) # Qry window
                        if rankout:
                            line.append(str(statobj.qryrankwin[func][r]))   # Rank
                    if func in funcspec:
                        for f in range(len(tree.subfam)):
                            line.append(str(statobj.famwin[func][tree.subfam[f]][r]))  # Subfam windows
                            if rankout:
                                line.append(str(statobj.famrankwin[func][tree.subfam[f]][r]))   # Subfam windows                    
            # <iv> # Writing
            _stage = '<5c-iv> Output - Writing'
            BADASP.write('%s\n' % string.join(line, delimit))
        BADASP.close()
        out.verbose(0,2,'Done!',2)

    except:
        mainlog.errorLog('Fatal Error in BADASP Full output (%s):' % _stage,True)
        BADASP.write('%s\n' % string.join(line, delimit))
        BADASP.close()
                        
    try:    ### <6> ### Partial Results Output 
        _stage = '<6> Partial Output'

        ## <a> ## Setup        
        _stage = '<6a> Output - Setup'
        # statlist & alnlen from above
        _part_append = False
        if out.stat['Interactive'] > 0 and rje.yesNo('Output additional, filtered results?',default='N'):
            partfile = rje.choice('Name for partial results file?:','%s.partial.badasp' % basename,confirm=True)
            if rje.checkForFile(partfile) and rje.yesNo('File %s exists. Append file without headers?' % partfile):
                _part_append = True
        else:
            return
        if rje.yesNo('Filter output columns?',default='N'):
            if rje.yesNo('Output query details (pos,aa & win)?') == False:
                query = None
            f = 1
            for fam in tree.subfam[0:]:
                if rje.yesNo('Output subfam %d (%s) details (pos,aa & win)?' % (f,fam.info['CladeName'])) == False:
                    tree.subfam.remove(fam)
                f += 1
            for func in statlist[0:]:
                if rje.yesNo('Output %s results?' % func) == False:
                    statlist.remove(func)

        alnout = [True] * alnlen
        if rje.yesNo('Filter Rows by Results VALUES?'):
            out.verbose(0,0,'Initial Defaults are minmum values. Accept intital default for no filtering of given Stat.',1)
            for stat in statlist:
                ### Filter by value? ###
                statobj = statObj(method=stat,objlist=[specmatrix,conseq])
                scores = statobj.score[stat][0:]
                scores.sort()
                cutoff = rje.getFloat('Min. value for %s?:' % stat,default='%f' % scores[0],confirm=True)
                for r in range(alnlen):
                    if statobj.score[stat][r] < cutoff:
                        alnout[r] = False
        if rankout and rje.yesNo('Filter Rows by Results RANKS?'):
            out.verbose(0,0,'Ranks range from 0 (low) to 1 (high).',1)
            for stat in statlist:
                ### Filter by Rank? ###
                statobj = statObj(method=stat,objlist=[specmatrix,conseq])
                cutoff = rje.getFloat('Min. rank for %s?:' % stat,default='0.0',confirm=True)
                for r in range(alnlen):
                    if statobj.rank[stat][r] < cutoff:
                        alnout[r] = False
                
        out.verbose(0,3,'\nBADASP Partial Results Output (%s) ...' % partfile,0)

        ## <b> ## Header
        _stage = '<6b> Partial Output - Header'
        if _part_append:
            BADASP = open(partfile, 'a')
        else:
            BADASP = open(partfile, 'w')
            BADASP.write("Partial BADASP Output: %s\n" % (time.asctime(time.localtime(time.time()))))
            BADASP.write('%s\n\n' % cmd_list)        
        header = ['aln_pos','anc_aa'] # Aln Pos and AA
        _stage = '<6b-i> Partial Output - Header Query'
        if query:
            header += ['qry_pos','qry_aa']  # Qry Pos and AA
        _stage = '<6b-ii> Partial Output - Header Subfam'
        for f in range(len(tree.subfam)):
            header += ['fam%d_pos' % (f+1),'fam%d_aa' % (f+1)]   # Subfam Pos and AA
        for func in statlist:
            _stage = '<6b-iii> Partial Output - Header %s' % func
            statobj = statObj(method=func,objlist=[specmatrix,conseq])
            fs = func.lower()
            header.append(fs)                   # Score
            if rankout:
                header.append('%s_rank' % fs)   # Rank
            if statobj.stat['WinSize'] > 1:
                header.append('%s_alnwin' % fs)     # Full align window
                if rankout:
                    header.append('%s_alnrankwin' % fs)   # Rank
                if query:
                    header.append('%s_qrywin' % fs) # Qry window
                    if rankout:
                        header.append('%s_qryrankwin' % fs)   # Rank
                if func in funcspec:
                    for f in range(len(tree.subfam)):
                        header.append('%s_fam%d_win' % (fs,f+1))  # Subfam windows
                        if rankout:
                            header.append('%s_fam%d_rankwin' % (fs,f+1))  # Subfam windows                    
        #if not _part_append:
        BADASP.write('%s\n' % string.join(header, delimit))
        out.verbose(1,3,'%s...' % string.join(header, delimit),0)

        ## <c> ## Stats
        _stage = '<6c> Stats'
        qr = 0  # Qry pos
        fr = [0] * len(tree.subfam) # List of subfam positions
        aa = '' # Root aa
        qa = '' # Qry aa
        fa = [''] * len(tree.subfam) # List of subfam aas
        for r in range(alnlen):
            if alnout[r] == False:
                continue
            # <i> # Positions and aas
            _stage = '<6c-i> Partial Output - Stats, positions & aas'
            aa = root.info['Sequence'][r]
            if query:
                qa = query.info['Sequence'][r]
                if qa != '-':
                    qr += 1
            for f in range(len(tree.subfam)):
                fa[f] = tree.subfam[f].obj['Sequence'].info['Sequence'][r]
                if fa[f] != '-':
                    fr[f] += 1

            # <ii> # Positions and AAs ii
            _stage = '<6c-ii> Partial Output - Pos & AA ii'
            line = ['%d' % (r+1), aa]    # Aln Pos and AA
            if query:
                if qa == '-':               
                    line += ['-',qa]  # Qry Pos and AA
                else:               
                    line += ['%d' % qr,qa]  # Qry Pos and AA
            for f in range(len(tree.subfam)):
                if fa[f] == '-':
                    line += ['-',fa[f]]   # Subfam Pos and AA
                else:
                    line += ['%d' % fr[f],fa[f]]   # Subfam Pos and AA

            # <iii> # Stats                        
            _stage = '<6c-iii> Partial Output - Stats'
            for func in statlist:
                statobj = statObj(method=func,objlist=[specmatrix,conseq])
                fs = func.lower()
                line.append(str(statobj.score[func][r]))   # Score
                if rankout:
                    line.append(str(statobj.rank[func][r]))   # Rank
                if specmatrix.stat['WinSize'] > 1:
                    line.append(str(statobj.alnwin[func][r]))     # Full align window
                    if rankout:
                        line.append(str(statobj.alnrankwin[func][r]))   # Rank
                    if query:
                        line.append(str(statobj.qrywin[func][r])) # Qry window
                        if rankout:
                            line.append(str(statobj.qryrankwin[func][r]))   # Rank
                    if func in funcspec:
                        for f in range(len(tree.subfam)):
                            line.append(str(statobj.famwin[func][tree.subfam[f]][r]))  # Subfam windows
                            if rankout:
                                line.append(str(statobj.famrankwin[func][tree.subfam[f]][r]))   # Subfam windows
            # <iv> # Writing
            _stage = '<6c-iv> Partial Output - Writing'
            BADASP.write('%s\n' % string.join(line, delimit))
        BADASP.close()
        out.verbose(0,2,'Done!',2)
        
    except:
        mainlog.errorLog('Fatal Error in BADASP Partial output (%s):' % _stage,True)
        BADASP.write('%s\n' % string.join(line, delimit))
        BADASP.close()