示例#1
0
 def _setAttributes(self):  ### Sets Attributes of Object
     '''Sets Attributes of Object.'''
     ### ~ Basics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self.strlist = ['DomFile', 'DMIFile', 'OccFile', 'PPIFile']
     self.boollist = []
     self.intlist = []
     self.numlist = []
     self.filelist = []
     self.listlist = []
     self.dictlist = []
     self.objlist = []
     ### ~ Defaults ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self._setDefaults(str='None',
                       bool=False,
                       int=0,
                       num=0.0,
                       obj=None,
                       setlist=True,
                       setdict=True,
                       setfile=True)
     self.setStr({})
     self.setBool({})
     self.setInt({})
     self.setNum({})
     ### ~ Other Attributes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self.obj['DB'] = rje_db.Database(self.log, self.cmd_list)
     self.obj['SLiMList'] = rje_slimlist.SLiMList(self.log, self.cmd_list)
示例#2
0
 def _setAttributes(self):  ### Sets Attributes of Object
     '''Sets Attributes of Object.'''
     ### ~ Basics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self.strlist = ['Candidates', 'SeqIn']
     self.boollist = []
     self.intlist = []
     self.numlist = []
     self.filelist = []
     self.listlist = []
     self.dictlist = []
     self.objlist = []
     ### ~ Defaults ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self._setDefaults(str='None',
                       bool=False,
                       int=0,
                       num=0.0,
                       obj=None,
                       setlist=True,
                       setdict=True,
                       setfile=True)
     self.setStr({})
     self.setBool({})
     self.setInt({})
     self.setNum({})
     ### ~ Other Attributes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self._setForkAttributes()  # Delete if no forking
     self.obj['DB'] = rje_db.Database(self.log, self.cmd_list)
     self.obj['HTML'] = rje_html.HTML(self.log, self.cmd_list)
示例#3
0
 def _setAttributes(self):  ### Sets Attributes of Object
     '''Sets Attributes of Object.'''
     ### Basics ###
     self.infolist = ['Pillars', 'PPIFile', 'XRef']
     self.optlist = ['SGD2SP', 'Gopher']
     self.statlist = []
     self.listlist = ['Pillars', 'YeastSeq']
     self.dictlist = ['PPI', 'Rename']
     self.objlist = ['SeqList']
     ### Defaults ###
     self._setDefaults(info='None',
                       opt=False,
                       stat=0.0,
                       obj=None,
                       setlist=True,
                       setdict=True)
     self.setInfo({
         'Pillars': 'Pillars.tab',
         'PPIFile': 'Y2H_union.txt',
         'XRef': 'yeast_xref.20101222.tdt'
     })
     ### Other Attributes ###
     self.obj['SeqList'] = rje_seq.SeqList(
         self.log,
         ['accnr=F', 'seqnr=F', 'autoload=T', 'seqin=Proteins.fsa'] +
         self.cmd_list)
     self.dict['SeqDict'] = self.obj['SeqList'].seqNameDic(
         proglog=self.stat['Verbose'] > 0)
     self.obj['DB'] = rje_db.Database(self.log, self.cmd_list)
示例#4
0
 def setup(self):    ### Main class setup method.
     '''Main class setup method.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.obj['DB'] = rje_db.Database(self.log,self.cmd_list+['tuplekeys=T'])
         if self.baseFile().lower() in ['','none']: self.baseFile('%s.vs.%s.Q%d' % (rje.baseFile(self.getStr('MutPileup'),True),rje.baseFile(self.getStr('WTPileup'),True),self.getInt('QCut')))
         if not self.force() and os.path.exists('%s.fdr.tdt' % self.baseFile()): return
         ### ~ [2] Look for/process WT Data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if self.force() or not os.path.exists('%s.WT.tdt' % self.baseFile()): self.parsePileup('WT',self.getStr('WTPileup'))
         ### ~ [3] Generate Reference sequences and Major Alleles (by locus) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         refseq = {}; rx = 0
         majors = {}
         locus = None
         WTDATA = open('%s.WT.tdt' % self.baseFile(),'r'); wx = 0
         for line in WTDATA:
             self.progLog('\r#WT','Reading WT data: Reference seq length = %s nt' % (rje.iStr(rx)),rand=0.01)
             data = rje.readDelimit(line); wx += 1
             if data[0] == 'Locus': continue
             else:
                 if data[0] != locus: locus = data[0]; refseq[locus] = ''; majors[locus] = []
                 pos = int(data[1])
                 while (pos - 1) > len(refseq[locus]): refseq[locus] += '?'; rx += 1
                 while (pos - 1) > len(majors[locus]): majors[locus].append('-')
                 refseq[locus] += data[2]; majors[locus].append(data[5]); rx += len(data[2])
         WTDATA.close()
         self.printLog('\r#WT','%s lines read from WT data: Reference seq length = %s nt' % (rje.iStr(wx),rje.iStr(rx)))
         for locus in rje.sortKeys(majors):
             if len(majors[locus]) != len(refseq[locus]): self.errorLog('%s WTMajor versus RefSeq length mismatch!' % locus,printerror=False); raise ValueError
         self.dict['WTMajor'] = majors
         self.dict['RefSeq'] = refseq
         ### ~ [3] Look for/process Mutant Data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if self.force() or not os.path.exists('%s.Mut.tdt' % self.baseFile()): self.parsePileup('Mut',self.getStr('MutPileup'),True)
         return True     # Setup successful
     except: self.errorLog('Problem during %s setup.' % self); return False  # Setup failed
示例#5
0
 def setup(self):    ### Main class setup method.
     '''Main class setup method.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         db = self.obj['DB'] = rje_db.Database(self.log,self.cmd_list+['delimit=,'])
         self.splitMascot()
         if self.getBool('iTRAQ') and self.dict['Samples']: self.iTRAQSamples()
         return True     # Setup successful
     except: self.errorLog('Problem during %s setup.' % self); return False  # Setup failed
示例#6
0
 def setup(self):    ### Main class setup method.
     '''Main class setup method.'''
     try:### ~ [1] Setup Database ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.obj['DB'] = rje_db.Database(self.log,self.cmd_list)
         db = self.db().addEmptyTable('ProDigIS',['AccNum','Protease','PepCount'],['AccNum','Protease'])
         if self.getInt('MinPepLen') > 0: db.addField('MinPepLen')
         if self.getBool('NRPep'): db.addField('NRPep')
         if rje.exists(self.getStr('Source')):
             fdb = self.db().addTable(self.getStr('Source'),mainkeys=['AccNum'],name='Source')
             fdb.addField('File')
             fdb.addField('ProtMWt')
         else: fdb = self.db().addEmptyTable('Source',['AccNum','File','ProtMWt'],['AccNum'])
         for i in range(1,self.getInt('MaxPepLen')+1): db.addField(i)
         if self.getBool('PepMWt'):
             for i in range(1,self.getInt('MaxPepLen')+1): db.addField(i*100.0)
         ### ~ [2] Load Sequences ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.obj['SeqList'] = rje_seq.SeqList(self.log,self.cmd_list+['seqin=None','autoload=F'])
         self.obj['SeqList'].seq = fullseq = []
         for seqfile in self.list['SeqFiles']:
             file = rje.baseFile(seqfile,True)
             seqlist = rje_seq.SeqList(self.log,['autofilter=T','gnspacc=T','seqnr=F']+self.cmd_list+['seqin=%s' % seqfile,'autoload=T'])
             fullseq += seqlist.seqs()
             for seq in seqlist.seqs():
                 accnum = seq.getStr('AccNum')
                 try:
                     entry = fdb.data()[accnum]
                     if 'File' in entry and entry['File']: self.errorLog('%s found in %s AND %s!' % (accnum,entry['File'],file),printerror=False)
                     entry['File'] = file
                     entry['ProtMWt'] = seq.MWt()
                 except:
                     entry = {'AccNum':accnum,'File':file,'ProtMWt':seq.MWt()}
                     fdb.addEntry(entry)
                 self.deBug(fdb.dict['Data'][seq.getStr('AccNum')])
         self.printLog('#SEQ','%s sequences to analyse in total' % rje.iLen(fullseq))
         fdb.fillBlanks()
         ### ~ [3] Setup Peptide Probabilities ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if self._peptideProbabilities():
             db.addField('LenExp','PepCount');
             if self.getBool('PepMWt'): db.addField('MWtExp','LenExp'); db.addField('Len7Exp','MWtExp')
             else: db.addField('Len7Exp','LenExp')
             db.addField('Len37','Len7Exp')
             if self.getBool('PepMWt'):
                 db.addField('Len5','MWtExp'); db.addField('MWt5','Len5')
                 db.addField('Len3','MWtExp'); db.addField('MWt3','Len3')
             else: db.addField('Len5','LenExp'); db.addField('Len3','LenExp')
         return
         ### ~ [4] Temp GABLAM Data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         gdb = self.db().addTable('Chlam_Pos.vs.embl_bacteria.hitsum.tdt',['Qry'],name='GABLAM')
         ndb = self.db().addTable('Chlam_Neg.vs.embl_bacteria.hitsum.tdt',['Qry'],name='GNeg')
         self.db().mergeTables(gdb,ndb,overwrite=True,matchfields=True)
         gdb.renameField('Qry','AccNum')
         tmp = self.db().joinTables(name='blast',join=[('Source','AccNum'),('GABLAM','AccNum')],newkey=['AccNum','File'],keeptable=False)
         tmp.saveToFile()
         tmp.compress(['File'],default='mean')
         tmp.dropFields(['AccNum'])
         tmp.info['Name'] = 'blastsum'
         tmp.saveToFile()
     except: self.errorLog('Problem during %s setup.' % self); return False  # Setup failed
示例#7
0
 def setup(self):  ### Main class setup method.
     '''Main class setup method.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.obj['DB'] = rje_db.Database(self.log,
                                          self.cmd_list + ['tuplekeys=T'])
         if not self.baseFile(return_none=''): self.baseFile('diploidocus')
         self.printLog('#BASE',
                       'Output file basename: %s' % self.baseFile())
         return True  # Setup successful
     except:
         self.errorLog('Problem during %s setup.' % self.prog())
         return False  # Setup failed
示例#8
0
 def batchSummarise(
     self
 ):  ### Batch run seqlist summarise on batchrun=LIST files and output table of results
     '''
     Batch run seqlist summarise on batchrun=LIST files and output table of results
     '''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not self.list['BatchRun']:
             raise ValueError(
                 'Need to provide batchrun=LIST files for summarise mode.')
         db = rje_db.Database(self.log, self.cmd_list)
         self.printLog('#BASE', db.baseFile())
         sdb = None
         if not self.force():
             sdb = db.addTable(mainkeys=['File'],
                               name='summarise',
                               expect=False)
         if not sdb: sdb = db.addEmptyTable('summarise', ['File'], ['File'])
         ### ~ [2] Run Summarise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.printLog(
             '#BATCH', 'Batch summarising %s input files' %
             rje.iLen(self.list['BatchRun']))
         for file in self.list['BatchRun']:
             seqdata = rje_seqlist.SeqList(
                 self.log, self.cmd_list +
                 ['seqin=%s' % file, 'autoload=T', 'summarise=F'
                  ]).summarise()
             if seqdata:
                 if 'GC' in seqdata:
                     seqdata.pop('GC')
                     seqdata['GCPC'] = '%.2f' % seqdata['GCPC']
                 if 'GapLength' in seqdata:
                     seqdata['GapPC'] = '%.2f' % (100.0 *
                                                  seqdata['GapLength'] /
                                                  seqdata['TotLength'])
                 seqdata['MeanLength'] = '%.1f' % seqdata['MeanLength']
                 for field in string.split(
                         'SeqNum, TotLength, MinLength, MaxLength, MeanLength, MedLength, N50Length, L50Count, GapLength, GapPC, GCPC',
                         ', '):
                     if field in seqdata and field not in sdb.fields():
                         sdb.addField(field)
                 for field in seqdata.keys():
                     if field not in sdb.fields(): sdb.addField(field)
                 sdb.addEntry(seqdata)
             else:
                 self.errorLog('Summarise failed for %s' % file,
                               printerror=False)
         ### ~ [3] Output Summarise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         sdb.saveToFile()
         return True
     except:
         self.errorLog('%s.batchSummarise error' % self)
         return False
示例#9
0
 def setup(self):  ### Main class setup method.
     '''Main class setup method.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.obj['DB'] = db = rje_db.Database(self.log, self.cmd_list)
         db.addEmptyTable('TimePoints', [
             'TimePoint Name', 'TimePoint Description', 'Source URL',
             'Year', 'yearUnit', 'month', 'day', 'keyword1', 'keyword2',
             'keyword3', 'keyword4', 'keyword5'
         ],
                          keys=['TimePoint Name'])
         return True  # Setup successful
     except:
         self.errorLog('Problem during %s setup.' % self)
         return False  # Setup failed
示例#10
0
 def setup(self):  ### Main class setup method.
     '''Main class setup method.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.obj['DB'] = rje_db.Database(self.log,
                                          self.cmd_list + ['tuplekeys=T'])
         if not self.baseFile(return_none=''):
             if self.getStrLC('SeqIn'):
                 self.baseFile(
                     rje.baseFile(self.getStr('SeqIn'), strip_path=True))
         self.printLog('#BASE',
                       'Output file basename: %s' % self.baseFile())
     except:
         self.errorLog('Problem during %s setup.' % self.prog())
         return False  # Setup failed
示例#11
0
 def setup(self):  ### Main class setup method.
     '''Main class setup method.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         fixfields = ['Location', 'Name', 'Artist', 'Composer', 'Album']
         db = self.obj['DB'] = rje_db.Database(self.log, self.cmd_list)
         #self.deBug(self.list['iTunes'])
         for ifile in self.list['iTunes']:
             #self.deBug(string.split(open(ifile,'r').readline(),'\t'))
             idb = db.addTable(ifile,
                               mainkeys=['Location'],
                               name=rje.baseFile(ifile, True))
             for field in iformat:
                 if iformat[field] == 'del' and field in idb.fields():
                     idb.dropField(field)
             idb.dataFormat(iformat)
             idb.addField('Album_Artist', 'Album')
             idb.addField('Tracks', evalue=1)
             if self.getBool('AddScore'): idb.addField('Score', evalue=0)
             for entry in idb.entries():
                 for field in fixfields:
                     newval = ''
                     for x in entry[field]:
                         if x.isalnum() or x in '\\/: -_()[].~$':
                             newval += x
                     entry[field] = newval
                 entry['Album_Artist'] = entry['Artist']
                 try:
                     for divider in ['\\\\', '\\', ':', '/']:
                         if len(string.split(entry['Location'],
                                             divider)) > 2:
                             entry['Album_Artist'] = string.split(
                                 entry['Location'], divider)[-3]
                             break
                 except:
                     self.errorLog('!')
                     self.deBug(entry['Location'])
                 if not entry['Plays']: entry['Plays'] = 0
                 if not entry['Skips']: entry['Skips'] = 0
                 if self.getBool('AddScore'):
                     if entry['My Rating']:
                         entry['Score'] = (entry['My Rating'] - 60) / 20.0
             idb.remakeKeys()
         return True  # Setup successful
     except:
         self.errorLog('Problem during %s setup.' % self)
         return False  # Setup failed
示例#12
0
 def setup(self):  ### Main class setup method.
     '''Main class setup method.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.obj['DB'] = rje_db.Database(self.log, self.cmd_list)
         self.db().addTable(self.getStr('GeneMap'),
                            mainkeys=['Gene'],
                            datakeys='All',
                            name='GeneMap')
         self.db().addTable(self.getStr('Pairwise'),
                            mainkeys=['Hub', 'Spoke'],
                            datakeys='All',
                            name='PPI')
         self.loadHHPID()
         return True  # Setup successful
     except:
         self.errorLog('Problem during %s setup.' % self)
         return False  # Setup failed
示例#13
0
 def _setAttributes(self):  ### Sets Attributes of Object
     '''Sets Attributes of Object.'''
     ### ~ Basics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self.strlist = ['ProtDesc', 'TaxBase']
     self.boollist = ['BootWeight', 'Monophyly']
     self.intlist = []
     self.numlist = [
         'BootFilter', 'MinBoot', 'MinScore', 'MinClass', 'MinSum',
         'NoneBoot'
     ]
     self.filelist = []
     self.listlist = ['Classify', 'NwkList', 'TaxFilter']
     self.dictlist = []
     self.objlist = []
     ### ~ Defaults ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self._setDefaults(str='None',
                       bool=False,
                       int=0,
                       num=0.0,
                       obj=None,
                       setlist=True,
                       setdict=True,
                       setfile=True)
     self.setStr({})
     self.setBool({'BootWeight': True, 'Monophyly': False})
     self.setInt({})
     self.setNum({
         'BootFilter': 0.0,
         'MinBoot': 0.5,
         'MinClass': 1.0,
         'MinScore': 1.0,
         'MinSum': 10.0,
         'NoneBoot': 1.0
     })
     self.obj['DB'] = rje_db.Database(
         self.log, ['tuplekeys=T', 'basefile=taxmap'] + self.cmd_list)
     self.baseFile(self.obj['DB'].baseFile())
     self.list['Classify'] = glob.glob(
         '*.class')  # List of files using wildcards and glob
     self.list['NwkList'] = glob.glob(
         '*.nwk')  # List of files using wildcards and glob
     ### ~ Other Attributes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self._setForkAttributes()  # Delete if no forking
示例#14
0
 def _setAttributes(self):   ### Sets Attributes of Object
     '''Sets Attributes of Object.'''
     ### ~ Basics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self.strlist = []
     self.boollist = []
     self.intlist = []
     self.numlist = []
     self.listlist = []
     self.dictlist = []
     self.objlist = []
     ### ~ Defaults ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self._setDefaults(str='None',bool=False,int=0,num=0.0,obj=None,setlist=True,setdict=True)
     #self.setInfo({})
     #self.setBool({})
     #self.setInt({})
     #self.setNum({})
     ### ~ Other Attributes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self._setForkAttributes()   # Delete if no forking
     self.obj['DB'] = rje_db.Database(self.log,self.cmd_list)
     self.obj['SeqList'] = rje_seq.SeqList(self.log,['autoload=T','dna=T']+self.cmd_list)
示例#15
0
 def _setAttributes(self):   ### Sets Attributes of Object
     '''Sets Attributes of Object.'''
     ### ~ Basics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self.strlist = ['ExOpt']
     self.boollist = ['MemSaver','Cleanup','GZip']
     self.intlist = []
     self.numlist = []
     self.filelist = []
     self.listlist = []
     self.dictlist = []
     self.objlist = []
     ### ~ Defaults ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self._setDefaults(str='None',bool=False,int=0,num=0.0,obj=None,setlist=True,setdict=True,setfile=True)
     self.setStr({})
     self.setBool({'MemSaver':False,'Cleanup':False,'GZip':True})
     self.setInt({})
     self.setNum({})
     ### ~ Other Attributes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self._setForkAttributes()   # Delete if no forking
     self.obj['DB'] = rje_db.Database(self.log,self.cmd_list+['tuplekeys=T'])
示例#16
0
 def setup(self):  ### Main class setup method.
     '''
     Main class setup method. This will load sequences into a SeqList object, gaps into a 'gaps' database table, and
     check or generate a PAF file from the mapped long reads.
     '''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.obj['DB'] = rje_db.Database(self.log, self.cmd_list)
         if not self.getStrLC('SeqIn'):
             raise ValueError('seqin=FILE must be set')
         if not rje.exists(self.getStr('SeqIn')):
             raise IOError('Unable to read seqin=FILE: "{0}"'.format(
                 self.getStr('SeqIn')))
         seqbase = rje.baseFile(self.getStr('SeqIn'), strip_path=True)
         if not self.getStrLC('Basefile'): self.baseFile(seqbase)
         if rje.checkForFiles(filelist=['.gaps.tdt'],
                              basename=seqbase,
                              log=self.log) and not self.force():
             self.cmd_list.append('gapstats=F')
         else:
             self.cmd_list.append('gapstats=T')
         seqin = self.seqinObj()
         gapdb = self.db().addTable('%s.gaps.tdt' % seqbase,
                                    mainkeys=['seqname', 'start', 'end'],
                                    name='gaps',
                                    ignore=[],
                                    expect=True)
         gapdb.dataFormat({'start': 'int', 'end': 'int'})
         ### ~ [2] PAF File ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not self.getStrLC('PAF'):
             self.setStr({'PAF': self.baseFile() + '.paf'})
         pfile = self.getStr('PAF')
         if self.force() or not rje.exists(pfile):
             paf = rje_paf.PAF(self.log, self.cmd_list)
             paf.longreadMinimapPAF(pfile)
         if not rje.exists(self.getStr('PAF')):
             raise IOError(
                 'Unable to read or create PAF file: {0}'.format(pfile))
         return True
     except:
         self.errorLog('Problem during %s setup.' % self.prog())
         return False  # Setup failed
示例#17
0
 def _setAttributes(self):  ### Sets Attributes of Object
     '''Sets Attributes of Object.'''
     ### ~ Basics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self.strlist = [
         'BegField', 'EndField', 'QueryField', 'Reformat', 'TargetField',
         'TDTFile'
     ]
     self.boollist = []
     self.intlist = []
     self.numlist = []
     self.filelist = []
     self.listlist = ['TDTKeys']
     self.dictlist = []
     self.objlist = []
     ### ~ Defaults ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self._setDefaults(str='None',
                       bool=False,
                       int=0,
                       num=0.0,
                       obj=None,
                       setlist=True,
                       setdict=True,
                       setfile=True)
     self.setStr({
         'BegField': 'QryStart',
         'EndField': 'HitStart',
         'QueryField': 'Qry',
         'Reformat': 'GFF3',
         'TargetField': 'Hit'
     })
     self.setBool({})
     self.setInt({})
     self.setNum({})
     self.list['TDTKeys'] = ['Qry', 'Hit', 'AlnNum']
     ### ~ Other Attributes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     self.obj['DB'] = rje_db.Database(self.log, self.cmd_list)
     self._setForkAttributes()  # Delete if no forking
示例#18
0
 def setup(
     self
 ):  ### Main class setup method.                                                                |0.0|
     '''Main class setup method.'''
     try:  ### ~ [1] Load tables ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         db = self.obj['DB'] = rje_db.Database(self.log, self.cmd_list)
         self.baseFile('ingolia')
         names = {
             's1a': 'synthesis',
             's1c': 'efficiency',  # Not sure what Table S1 data is!
             's2a': 'genes',
             's2b': 'pauses',
             's2c': 'stops',
             's3': 'starts',
             's4': 'lincRNA'
         }
         for tab in [
                 's1a', 's1b', 's1c', 's1d', 's2a', 's2b', 's2c', 's3', 's4'
         ]:
             if tab in names: name = names[tab]
             else: name = tab
             dfile = '%s.%s.csv' % (self.baseFile(), tab)
             if name == 'pauses':
                 self.db().addTable(dfile,
                                    mainkeys=['UCSC ID', 'Codon'],
                                    name=name)
             elif name == 'starts':
                 self.db().addTable(
                     dfile,
                     mainkeys=['knownGene', 'Init Codon [nt]'],
                     name=name)
             else:
                 self.db().addTable(dfile, name=name)
         return True  # Setup successful
     except:
         self.errorLog('Problem during %s setup.' % self)
         return False  # Setup failed
示例#19
0
 def setup(self):  ### Main class setup method.
     '''Main class setup method.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.obj['DB'] = rje_db.Database(self.log, self.cmd_list)
         self.db().basefile(self.basefile())
         self.list['Accuracy'] = [0, 1.0 - self.getNum('ErrPerBase')]
         ## ~ [1a] SMRTReads ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         while self.getStrLC('SMRTUnits') not in ['reads', 'gb', 'mb']:
             txt = 'SMRTUnits "%s" not recognised'
             if self.getNum('SMRTReads') < 10: smrtunits = 'Gb'
             elif self.getNum('SMRTReads') > 10000: smrtunits = 'reads'
             else: smrtunits = 'Mb'
             if self.i() < 0 or rje.yesNo(
                     '%s: switch to (%s) %s?' %
                 (txt, self.getNum('SMRTReads'), smrtunits)):
                 self.setStr({'SMRTUnits': smrtunits})
             elif self.i() > 0:
                 self.setStr(
                     {'SMRTUnits': rje.choice('SMRTUnits (reads/Gb/Mb)?')})
             self.printLog('#UNITS',
                           '%s => %s' % (txt, self.getStr('SMRTUnits')))
         if self.getStrLC('SMRTUnits') in ['gb', 'mb']:
             smrttotal = self.getNum('SMRTReads') * {
                 'gb': 1e9,
                 'mb': 1e6
             }[self.getStrLC('SMRTUnits')]
             txt = '%s %s @ %.3f kb/read' % (self.getNum('SMRTReads'),
                                             self.getStr('SMRTUnits'),
                                             self.getNum('AvRead') / 1000.0)
             self.setNum({'SMRTReads': smrttotal / self.getNum('AvRead')})
             txt += ' => %s reads' % rje.iStr(int(self.getNum('SMRTReads')))
             self.printLog('#READS', txt)
         ## ~ [1b] XnList ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         xnlist = []
         for xn in self.list['XnList']:
             if xn == '': continue
             try:
                 ixn = int(xn)
                 if xn not in [ixn, '%d' % ixn]:
                     self.printLog('#XN', '"%s" -> %dX' % (xn, ixn))
                 if ixn == 0:
                     self.printLog(
                         '#XN', 'No point in 0X output: use 1-%Coverage.')
                 elif ixn == 1:
                     self.printLog('#XN',
                                   'No point in 1X output: use %Coverage.')
                 else:
                     xnlist.append(ixn)
             except:
                 self.errorLog(
                     'Could not process %s as part of XnList. (Integers only.)'
                     % xn)
         xnlist.sort()
         if xnlist:
             self.printLog(
                 '#XN', 'XnList: %sX.' %
                 string.join(string.split('%s' % xnlist, ','), 'X, ')[1:-1])
         self.list['XnList'] = xnlist
         return True  # Setup successful
     except:
         self.errorLog('Problem during %s setup.' % self.prog())
         return False  # Setup failed
示例#20
0
 def run(self):  ### Main run method
     '''Main run method.'''
     try:  ### ~ [1] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         infile = self.getStr('InFile')
         while not rje.exists(infile):
             infile = rje.choice(
                 'File "%s" not found. Input file name? (Blank to quit):' %
                 infile)
             if not infile:
                 return self.printLog('#QUIT', 'Execution terminated!')
         db = rje_db.Database(self.log, self.cmd_list)
         db.basefile(rje.baseFile(infile))
         sdb = db.addTable(infile,
                           mainkeys='#',
                           delimit='\t',
                           name='SPF.Mod')
         levels = {
             'Level_1': 'k',
             'Level_2': 'p',
             'Level_3': 'c',
             'Level_4': 'o',
             'Level_5': 'f',
             'Level_6': 'g',
             'Level_7': 's'
         }
         # k__Bacteria	p__Proteobacteria	c__Alphaproteobacteria	o__Rhodospirillales	f__Rhodospirillaceae	g__	s__	denovo44
         # Unassigned	unclassified	unclassified	unclassified	unclassified	unclassified	unclassified	denovo49
         ### ~ [1] Modify Text ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         dupnames = []
         parents = {}  # Parent for each term
         renamed = []
         ex = 0.0
         etot = sdb.entryNum()
         for entry in sdb.entries():
             self.progLog('\r#SPF',
                          'Modifying SPF content: %.1f%%' % (ex / etot))
             ex += 100.0
             taxon = ''
             parent = ''
             #self.debug(entry)
             for lvl in [
                     'Level_1', 'Level_2', 'Level_3', 'Level_4', 'Level_5',
                     'Level_6', 'Level_7'
             ]:
                 entry[lvl] = string.replace(entry[lvl], 'unidentified',
                                             'unclassified')
                 #entry[lvl] = string.replace(entry[lvl],'Incertae_sedis','Incertae_sedis-%s' % levels[lvl])
                 null = '%s__' % levels[lvl]
                 #self.bugPrint(null)
                 #self.bugPrint(entry[lvl])
                 if entry[lvl] in [
                         null, 'Unassigned', 'unclassified',
                         '%sunclassified' % null,
                         '%sunidentified' % null,
                         '%sunculturedfungus' % null,
                         '%sIncertae_sedis' % null,
                         '%sunclassified_sp.' % null
                 ]:
                     if not taxon or taxon.endswith('unclassified'):
                         entry[lvl] = '%sunclassified' % null
                         #elif taxon.endswith('unassigned)'): entry[lvl] = '%s%s' % (null,taxon[3:])
                         #elif taxon.endswith('unassigned)'): entry[lvl] = '%s(%s;%s-unassigned)' % (null,string.split(taxon,'(')[1][:-1],levels[lvl])
                     elif taxon.endswith('unassigned)'):
                         entry[lvl] = '%s%s;%s-unassigned)' % (
                             null, taxon[3:][:-1], levels[lvl])
                     else:
                         entry[lvl] = '%s%s(%s-unassigned)' % (
                             null, taxon[3:], levels[lvl])
                 if entry[lvl] in parents:
                     #self.debug(parents[entry[lvl]])
                     if parent in parents[entry[lvl]]:
                         entry[lvl] = parents[entry[lvl]][parent]
                     else:
                         self.bugPrint(entry[lvl])
                         self.bugPrint(parents[entry[lvl]])
                         renamed.append(entry[lvl])
                         newtax = '%s%d' % (entry[lvl],
                                            renamed.count(entry[lvl]))
                         self.warnLog(
                             '%s had multiple parents (%s & %s) -> %s' %
                             (entry[lvl],
                              string.join(parents[entry[lvl]],
                                          '|'), parent, newtax))
                         parents[newtax] = {parent: newtax}
                         parents[entry[lvl]][parent] = newtax
                         entry[lvl] = newtax
                         self.deBug(parents[entry[lvl]])
                 elif parent:
                     parents[entry[lvl]] = {parent: entry[lvl]}
                 parent = entry[lvl]
                 if entry[lvl][3:] == taxon[3:]:
                     if (entry[lvl], taxon) not in dupnames:
                         dupnames.append((entry[lvl], taxon))
                 #self.bugPrint(entry[lvl])
                 taxon = entry[lvl]
             #self.debug(entry)
             #self.debug(parents)
         self.printLog('\r#SPF', 'Modifying SPF content complete.')
         dupnames.sort()
         for (dupA, dupB) in dupnames:
             self.warnLog('Duplicate taxa names: %s & %s' % (dupA, dupB))
         ### ~ [2] Save to file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         sdb.saveToFile(savefields=sdb.list['Fields'][1:])
         ### ~ [3] Compress to different taxonomic levels ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         compress = [
             'Level_1', 'Level_2', 'Level_3', 'Level_4', 'Level_5',
             'Level_6', 'Level_7', '#'
         ]
         dump = compress.pop(-1)
         rules = {'Observation Ids': 'list', dump: 'str'}
         sdb.dropField('Observation Ids')
         while compress:
             sdb.compress(compress,
                          rules=rules,
                          default='sum',
                          best=[],
                          joinchar='|')
             #if dump == '#':
             sdb.dropField(dump)
             sdb.saveToFile(
                 '%s.SPF.%s.%s.spf' %
                 (rje.baseFile(infile), compress[-1], levels[compress[-1]]))
             dump = compress.pop(-1)
             rules[dump] = 'list'
         return
     except:
         self.errorLog(self.zen())
         raise  # Delete this if method error not terrible