def _setAttributes(self): ### Sets Attributes of Object '''Sets Attributes of Object.''' ### ~ Basics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.strlist = ['DomFile', 'DMIFile', 'OccFile', 'PPIFile'] self.boollist = [] self.intlist = [] self.numlist = [] self.filelist = [] self.listlist = [] self.dictlist = [] self.objlist = [] ### ~ Defaults ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self._setDefaults(str='None', bool=False, int=0, num=0.0, obj=None, setlist=True, setdict=True, setfile=True) self.setStr({}) self.setBool({}) self.setInt({}) self.setNum({}) ### ~ Other Attributes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.obj['DB'] = rje_db.Database(self.log, self.cmd_list) self.obj['SLiMList'] = rje_slimlist.SLiMList(self.log, self.cmd_list)
def _setAttributes(self): ### Sets Attributes of Object '''Sets Attributes of Object.''' ### ~ Basics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.strlist = ['Candidates', 'SeqIn'] self.boollist = [] self.intlist = [] self.numlist = [] self.filelist = [] self.listlist = [] self.dictlist = [] self.objlist = [] ### ~ Defaults ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self._setDefaults(str='None', bool=False, int=0, num=0.0, obj=None, setlist=True, setdict=True, setfile=True) self.setStr({}) self.setBool({}) self.setInt({}) self.setNum({}) ### ~ Other Attributes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self._setForkAttributes() # Delete if no forking self.obj['DB'] = rje_db.Database(self.log, self.cmd_list) self.obj['HTML'] = rje_html.HTML(self.log, self.cmd_list)
def _setAttributes(self): ### Sets Attributes of Object '''Sets Attributes of Object.''' ### Basics ### self.infolist = ['Pillars', 'PPIFile', 'XRef'] self.optlist = ['SGD2SP', 'Gopher'] self.statlist = [] self.listlist = ['Pillars', 'YeastSeq'] self.dictlist = ['PPI', 'Rename'] self.objlist = ['SeqList'] ### Defaults ### self._setDefaults(info='None', opt=False, stat=0.0, obj=None, setlist=True, setdict=True) self.setInfo({ 'Pillars': 'Pillars.tab', 'PPIFile': 'Y2H_union.txt', 'XRef': 'yeast_xref.20101222.tdt' }) ### Other Attributes ### self.obj['SeqList'] = rje_seq.SeqList( self.log, ['accnr=F', 'seqnr=F', 'autoload=T', 'seqin=Proteins.fsa'] + self.cmd_list) self.dict['SeqDict'] = self.obj['SeqList'].seqNameDic( proglog=self.stat['Verbose'] > 0) self.obj['DB'] = rje_db.Database(self.log, self.cmd_list)
def setup(self): ### Main class setup method. '''Main class setup method.''' try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.obj['DB'] = rje_db.Database(self.log,self.cmd_list+['tuplekeys=T']) if self.baseFile().lower() in ['','none']: self.baseFile('%s.vs.%s.Q%d' % (rje.baseFile(self.getStr('MutPileup'),True),rje.baseFile(self.getStr('WTPileup'),True),self.getInt('QCut'))) if not self.force() and os.path.exists('%s.fdr.tdt' % self.baseFile()): return ### ~ [2] Look for/process WT Data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### if self.force() or not os.path.exists('%s.WT.tdt' % self.baseFile()): self.parsePileup('WT',self.getStr('WTPileup')) ### ~ [3] Generate Reference sequences and Major Alleles (by locus) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### refseq = {}; rx = 0 majors = {} locus = None WTDATA = open('%s.WT.tdt' % self.baseFile(),'r'); wx = 0 for line in WTDATA: self.progLog('\r#WT','Reading WT data: Reference seq length = %s nt' % (rje.iStr(rx)),rand=0.01) data = rje.readDelimit(line); wx += 1 if data[0] == 'Locus': continue else: if data[0] != locus: locus = data[0]; refseq[locus] = ''; majors[locus] = [] pos = int(data[1]) while (pos - 1) > len(refseq[locus]): refseq[locus] += '?'; rx += 1 while (pos - 1) > len(majors[locus]): majors[locus].append('-') refseq[locus] += data[2]; majors[locus].append(data[5]); rx += len(data[2]) WTDATA.close() self.printLog('\r#WT','%s lines read from WT data: Reference seq length = %s nt' % (rje.iStr(wx),rje.iStr(rx))) for locus in rje.sortKeys(majors): if len(majors[locus]) != len(refseq[locus]): self.errorLog('%s WTMajor versus RefSeq length mismatch!' % locus,printerror=False); raise ValueError self.dict['WTMajor'] = majors self.dict['RefSeq'] = refseq ### ~ [3] Look for/process Mutant Data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### if self.force() or not os.path.exists('%s.Mut.tdt' % self.baseFile()): self.parsePileup('Mut',self.getStr('MutPileup'),True) return True # Setup successful except: self.errorLog('Problem during %s setup.' % self); return False # Setup failed
def setup(self): ### Main class setup method. '''Main class setup method.''' try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### db = self.obj['DB'] = rje_db.Database(self.log,self.cmd_list+['delimit=,']) self.splitMascot() if self.getBool('iTRAQ') and self.dict['Samples']: self.iTRAQSamples() return True # Setup successful except: self.errorLog('Problem during %s setup.' % self); return False # Setup failed
def setup(self): ### Main class setup method. '''Main class setup method.''' try:### ~ [1] Setup Database ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.obj['DB'] = rje_db.Database(self.log,self.cmd_list) db = self.db().addEmptyTable('ProDigIS',['AccNum','Protease','PepCount'],['AccNum','Protease']) if self.getInt('MinPepLen') > 0: db.addField('MinPepLen') if self.getBool('NRPep'): db.addField('NRPep') if rje.exists(self.getStr('Source')): fdb = self.db().addTable(self.getStr('Source'),mainkeys=['AccNum'],name='Source') fdb.addField('File') fdb.addField('ProtMWt') else: fdb = self.db().addEmptyTable('Source',['AccNum','File','ProtMWt'],['AccNum']) for i in range(1,self.getInt('MaxPepLen')+1): db.addField(i) if self.getBool('PepMWt'): for i in range(1,self.getInt('MaxPepLen')+1): db.addField(i*100.0) ### ~ [2] Load Sequences ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.obj['SeqList'] = rje_seq.SeqList(self.log,self.cmd_list+['seqin=None','autoload=F']) self.obj['SeqList'].seq = fullseq = [] for seqfile in self.list['SeqFiles']: file = rje.baseFile(seqfile,True) seqlist = rje_seq.SeqList(self.log,['autofilter=T','gnspacc=T','seqnr=F']+self.cmd_list+['seqin=%s' % seqfile,'autoload=T']) fullseq += seqlist.seqs() for seq in seqlist.seqs(): accnum = seq.getStr('AccNum') try: entry = fdb.data()[accnum] if 'File' in entry and entry['File']: self.errorLog('%s found in %s AND %s!' % (accnum,entry['File'],file),printerror=False) entry['File'] = file entry['ProtMWt'] = seq.MWt() except: entry = {'AccNum':accnum,'File':file,'ProtMWt':seq.MWt()} fdb.addEntry(entry) self.deBug(fdb.dict['Data'][seq.getStr('AccNum')]) self.printLog('#SEQ','%s sequences to analyse in total' % rje.iLen(fullseq)) fdb.fillBlanks() ### ~ [3] Setup Peptide Probabilities ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### if self._peptideProbabilities(): db.addField('LenExp','PepCount'); if self.getBool('PepMWt'): db.addField('MWtExp','LenExp'); db.addField('Len7Exp','MWtExp') else: db.addField('Len7Exp','LenExp') db.addField('Len37','Len7Exp') if self.getBool('PepMWt'): db.addField('Len5','MWtExp'); db.addField('MWt5','Len5') db.addField('Len3','MWtExp'); db.addField('MWt3','Len3') else: db.addField('Len5','LenExp'); db.addField('Len3','LenExp') return ### ~ [4] Temp GABLAM Data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### gdb = self.db().addTable('Chlam_Pos.vs.embl_bacteria.hitsum.tdt',['Qry'],name='GABLAM') ndb = self.db().addTable('Chlam_Neg.vs.embl_bacteria.hitsum.tdt',['Qry'],name='GNeg') self.db().mergeTables(gdb,ndb,overwrite=True,matchfields=True) gdb.renameField('Qry','AccNum') tmp = self.db().joinTables(name='blast',join=[('Source','AccNum'),('GABLAM','AccNum')],newkey=['AccNum','File'],keeptable=False) tmp.saveToFile() tmp.compress(['File'],default='mean') tmp.dropFields(['AccNum']) tmp.info['Name'] = 'blastsum' tmp.saveToFile() except: self.errorLog('Problem during %s setup.' % self); return False # Setup failed
def setup(self): ### Main class setup method. '''Main class setup method.''' try: ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.obj['DB'] = rje_db.Database(self.log, self.cmd_list + ['tuplekeys=T']) if not self.baseFile(return_none=''): self.baseFile('diploidocus') self.printLog('#BASE', 'Output file basename: %s' % self.baseFile()) return True # Setup successful except: self.errorLog('Problem during %s setup.' % self.prog()) return False # Setup failed
def batchSummarise( self ): ### Batch run seqlist summarise on batchrun=LIST files and output table of results ''' Batch run seqlist summarise on batchrun=LIST files and output table of results ''' try: ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### if not self.list['BatchRun']: raise ValueError( 'Need to provide batchrun=LIST files for summarise mode.') db = rje_db.Database(self.log, self.cmd_list) self.printLog('#BASE', db.baseFile()) sdb = None if not self.force(): sdb = db.addTable(mainkeys=['File'], name='summarise', expect=False) if not sdb: sdb = db.addEmptyTable('summarise', ['File'], ['File']) ### ~ [2] Run Summarise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.printLog( '#BATCH', 'Batch summarising %s input files' % rje.iLen(self.list['BatchRun'])) for file in self.list['BatchRun']: seqdata = rje_seqlist.SeqList( self.log, self.cmd_list + ['seqin=%s' % file, 'autoload=T', 'summarise=F' ]).summarise() if seqdata: if 'GC' in seqdata: seqdata.pop('GC') seqdata['GCPC'] = '%.2f' % seqdata['GCPC'] if 'GapLength' in seqdata: seqdata['GapPC'] = '%.2f' % (100.0 * seqdata['GapLength'] / seqdata['TotLength']) seqdata['MeanLength'] = '%.1f' % seqdata['MeanLength'] for field in string.split( 'SeqNum, TotLength, MinLength, MaxLength, MeanLength, MedLength, N50Length, L50Count, GapLength, GapPC, GCPC', ', '): if field in seqdata and field not in sdb.fields(): sdb.addField(field) for field in seqdata.keys(): if field not in sdb.fields(): sdb.addField(field) sdb.addEntry(seqdata) else: self.errorLog('Summarise failed for %s' % file, printerror=False) ### ~ [3] Output Summarise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### sdb.saveToFile() return True except: self.errorLog('%s.batchSummarise error' % self) return False
def setup(self): ### Main class setup method. '''Main class setup method.''' try: ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.obj['DB'] = db = rje_db.Database(self.log, self.cmd_list) db.addEmptyTable('TimePoints', [ 'TimePoint Name', 'TimePoint Description', 'Source URL', 'Year', 'yearUnit', 'month', 'day', 'keyword1', 'keyword2', 'keyword3', 'keyword4', 'keyword5' ], keys=['TimePoint Name']) return True # Setup successful except: self.errorLog('Problem during %s setup.' % self) return False # Setup failed
def setup(self): ### Main class setup method. '''Main class setup method.''' try: ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.obj['DB'] = rje_db.Database(self.log, self.cmd_list + ['tuplekeys=T']) if not self.baseFile(return_none=''): if self.getStrLC('SeqIn'): self.baseFile( rje.baseFile(self.getStr('SeqIn'), strip_path=True)) self.printLog('#BASE', 'Output file basename: %s' % self.baseFile()) except: self.errorLog('Problem during %s setup.' % self.prog()) return False # Setup failed
def setup(self): ### Main class setup method. '''Main class setup method.''' try: ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### fixfields = ['Location', 'Name', 'Artist', 'Composer', 'Album'] db = self.obj['DB'] = rje_db.Database(self.log, self.cmd_list) #self.deBug(self.list['iTunes']) for ifile in self.list['iTunes']: #self.deBug(string.split(open(ifile,'r').readline(),'\t')) idb = db.addTable(ifile, mainkeys=['Location'], name=rje.baseFile(ifile, True)) for field in iformat: if iformat[field] == 'del' and field in idb.fields(): idb.dropField(field) idb.dataFormat(iformat) idb.addField('Album_Artist', 'Album') idb.addField('Tracks', evalue=1) if self.getBool('AddScore'): idb.addField('Score', evalue=0) for entry in idb.entries(): for field in fixfields: newval = '' for x in entry[field]: if x.isalnum() or x in '\\/: -_()[].~$': newval += x entry[field] = newval entry['Album_Artist'] = entry['Artist'] try: for divider in ['\\\\', '\\', ':', '/']: if len(string.split(entry['Location'], divider)) > 2: entry['Album_Artist'] = string.split( entry['Location'], divider)[-3] break except: self.errorLog('!') self.deBug(entry['Location']) if not entry['Plays']: entry['Plays'] = 0 if not entry['Skips']: entry['Skips'] = 0 if self.getBool('AddScore'): if entry['My Rating']: entry['Score'] = (entry['My Rating'] - 60) / 20.0 idb.remakeKeys() return True # Setup successful except: self.errorLog('Problem during %s setup.' % self) return False # Setup failed
def setup(self): ### Main class setup method. '''Main class setup method.''' try: ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.obj['DB'] = rje_db.Database(self.log, self.cmd_list) self.db().addTable(self.getStr('GeneMap'), mainkeys=['Gene'], datakeys='All', name='GeneMap') self.db().addTable(self.getStr('Pairwise'), mainkeys=['Hub', 'Spoke'], datakeys='All', name='PPI') self.loadHHPID() return True # Setup successful except: self.errorLog('Problem during %s setup.' % self) return False # Setup failed
def _setAttributes(self): ### Sets Attributes of Object '''Sets Attributes of Object.''' ### ~ Basics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.strlist = ['ProtDesc', 'TaxBase'] self.boollist = ['BootWeight', 'Monophyly'] self.intlist = [] self.numlist = [ 'BootFilter', 'MinBoot', 'MinScore', 'MinClass', 'MinSum', 'NoneBoot' ] self.filelist = [] self.listlist = ['Classify', 'NwkList', 'TaxFilter'] self.dictlist = [] self.objlist = [] ### ~ Defaults ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self._setDefaults(str='None', bool=False, int=0, num=0.0, obj=None, setlist=True, setdict=True, setfile=True) self.setStr({}) self.setBool({'BootWeight': True, 'Monophyly': False}) self.setInt({}) self.setNum({ 'BootFilter': 0.0, 'MinBoot': 0.5, 'MinClass': 1.0, 'MinScore': 1.0, 'MinSum': 10.0, 'NoneBoot': 1.0 }) self.obj['DB'] = rje_db.Database( self.log, ['tuplekeys=T', 'basefile=taxmap'] + self.cmd_list) self.baseFile(self.obj['DB'].baseFile()) self.list['Classify'] = glob.glob( '*.class') # List of files using wildcards and glob self.list['NwkList'] = glob.glob( '*.nwk') # List of files using wildcards and glob ### ~ Other Attributes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self._setForkAttributes() # Delete if no forking
def _setAttributes(self): ### Sets Attributes of Object '''Sets Attributes of Object.''' ### ~ Basics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.strlist = [] self.boollist = [] self.intlist = [] self.numlist = [] self.listlist = [] self.dictlist = [] self.objlist = [] ### ~ Defaults ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self._setDefaults(str='None',bool=False,int=0,num=0.0,obj=None,setlist=True,setdict=True) #self.setInfo({}) #self.setBool({}) #self.setInt({}) #self.setNum({}) ### ~ Other Attributes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self._setForkAttributes() # Delete if no forking self.obj['DB'] = rje_db.Database(self.log,self.cmd_list) self.obj['SeqList'] = rje_seq.SeqList(self.log,['autoload=T','dna=T']+self.cmd_list)
def _setAttributes(self): ### Sets Attributes of Object '''Sets Attributes of Object.''' ### ~ Basics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.strlist = ['ExOpt'] self.boollist = ['MemSaver','Cleanup','GZip'] self.intlist = [] self.numlist = [] self.filelist = [] self.listlist = [] self.dictlist = [] self.objlist = [] ### ~ Defaults ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self._setDefaults(str='None',bool=False,int=0,num=0.0,obj=None,setlist=True,setdict=True,setfile=True) self.setStr({}) self.setBool({'MemSaver':False,'Cleanup':False,'GZip':True}) self.setInt({}) self.setNum({}) ### ~ Other Attributes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self._setForkAttributes() # Delete if no forking self.obj['DB'] = rje_db.Database(self.log,self.cmd_list+['tuplekeys=T'])
def setup(self): ### Main class setup method. ''' Main class setup method. This will load sequences into a SeqList object, gaps into a 'gaps' database table, and check or generate a PAF file from the mapped long reads. ''' try: ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.obj['DB'] = rje_db.Database(self.log, self.cmd_list) if not self.getStrLC('SeqIn'): raise ValueError('seqin=FILE must be set') if not rje.exists(self.getStr('SeqIn')): raise IOError('Unable to read seqin=FILE: "{0}"'.format( self.getStr('SeqIn'))) seqbase = rje.baseFile(self.getStr('SeqIn'), strip_path=True) if not self.getStrLC('Basefile'): self.baseFile(seqbase) if rje.checkForFiles(filelist=['.gaps.tdt'], basename=seqbase, log=self.log) and not self.force(): self.cmd_list.append('gapstats=F') else: self.cmd_list.append('gapstats=T') seqin = self.seqinObj() gapdb = self.db().addTable('%s.gaps.tdt' % seqbase, mainkeys=['seqname', 'start', 'end'], name='gaps', ignore=[], expect=True) gapdb.dataFormat({'start': 'int', 'end': 'int'}) ### ~ [2] PAF File ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### if not self.getStrLC('PAF'): self.setStr({'PAF': self.baseFile() + '.paf'}) pfile = self.getStr('PAF') if self.force() or not rje.exists(pfile): paf = rje_paf.PAF(self.log, self.cmd_list) paf.longreadMinimapPAF(pfile) if not rje.exists(self.getStr('PAF')): raise IOError( 'Unable to read or create PAF file: {0}'.format(pfile)) return True except: self.errorLog('Problem during %s setup.' % self.prog()) return False # Setup failed
def _setAttributes(self): ### Sets Attributes of Object '''Sets Attributes of Object.''' ### ~ Basics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.strlist = [ 'BegField', 'EndField', 'QueryField', 'Reformat', 'TargetField', 'TDTFile' ] self.boollist = [] self.intlist = [] self.numlist = [] self.filelist = [] self.listlist = ['TDTKeys'] self.dictlist = [] self.objlist = [] ### ~ Defaults ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self._setDefaults(str='None', bool=False, int=0, num=0.0, obj=None, setlist=True, setdict=True, setfile=True) self.setStr({ 'BegField': 'QryStart', 'EndField': 'HitStart', 'QueryField': 'Qry', 'Reformat': 'GFF3', 'TargetField': 'Hit' }) self.setBool({}) self.setInt({}) self.setNum({}) self.list['TDTKeys'] = ['Qry', 'Hit', 'AlnNum'] ### ~ Other Attributes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.obj['DB'] = rje_db.Database(self.log, self.cmd_list) self._setForkAttributes() # Delete if no forking
def setup( self ): ### Main class setup method. |0.0| '''Main class setup method.''' try: ### ~ [1] Load tables ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### db = self.obj['DB'] = rje_db.Database(self.log, self.cmd_list) self.baseFile('ingolia') names = { 's1a': 'synthesis', 's1c': 'efficiency', # Not sure what Table S1 data is! 's2a': 'genes', 's2b': 'pauses', 's2c': 'stops', 's3': 'starts', 's4': 'lincRNA' } for tab in [ 's1a', 's1b', 's1c', 's1d', 's2a', 's2b', 's2c', 's3', 's4' ]: if tab in names: name = names[tab] else: name = tab dfile = '%s.%s.csv' % (self.baseFile(), tab) if name == 'pauses': self.db().addTable(dfile, mainkeys=['UCSC ID', 'Codon'], name=name) elif name == 'starts': self.db().addTable( dfile, mainkeys=['knownGene', 'Init Codon [nt]'], name=name) else: self.db().addTable(dfile, name=name) return True # Setup successful except: self.errorLog('Problem during %s setup.' % self) return False # Setup failed
def setup(self): ### Main class setup method. '''Main class setup method.''' try: ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.obj['DB'] = rje_db.Database(self.log, self.cmd_list) self.db().basefile(self.basefile()) self.list['Accuracy'] = [0, 1.0 - self.getNum('ErrPerBase')] ## ~ [1a] SMRTReads ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## while self.getStrLC('SMRTUnits') not in ['reads', 'gb', 'mb']: txt = 'SMRTUnits "%s" not recognised' if self.getNum('SMRTReads') < 10: smrtunits = 'Gb' elif self.getNum('SMRTReads') > 10000: smrtunits = 'reads' else: smrtunits = 'Mb' if self.i() < 0 or rje.yesNo( '%s: switch to (%s) %s?' % (txt, self.getNum('SMRTReads'), smrtunits)): self.setStr({'SMRTUnits': smrtunits}) elif self.i() > 0: self.setStr( {'SMRTUnits': rje.choice('SMRTUnits (reads/Gb/Mb)?')}) self.printLog('#UNITS', '%s => %s' % (txt, self.getStr('SMRTUnits'))) if self.getStrLC('SMRTUnits') in ['gb', 'mb']: smrttotal = self.getNum('SMRTReads') * { 'gb': 1e9, 'mb': 1e6 }[self.getStrLC('SMRTUnits')] txt = '%s %s @ %.3f kb/read' % (self.getNum('SMRTReads'), self.getStr('SMRTUnits'), self.getNum('AvRead') / 1000.0) self.setNum({'SMRTReads': smrttotal / self.getNum('AvRead')}) txt += ' => %s reads' % rje.iStr(int(self.getNum('SMRTReads'))) self.printLog('#READS', txt) ## ~ [1b] XnList ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## xnlist = [] for xn in self.list['XnList']: if xn == '': continue try: ixn = int(xn) if xn not in [ixn, '%d' % ixn]: self.printLog('#XN', '"%s" -> %dX' % (xn, ixn)) if ixn == 0: self.printLog( '#XN', 'No point in 0X output: use 1-%Coverage.') elif ixn == 1: self.printLog('#XN', 'No point in 1X output: use %Coverage.') else: xnlist.append(ixn) except: self.errorLog( 'Could not process %s as part of XnList. (Integers only.)' % xn) xnlist.sort() if xnlist: self.printLog( '#XN', 'XnList: %sX.' % string.join(string.split('%s' % xnlist, ','), 'X, ')[1:-1]) self.list['XnList'] = xnlist return True # Setup successful except: self.errorLog('Problem during %s setup.' % self.prog()) return False # Setup failed
def run(self): ### Main run method '''Main run method.''' try: ### ~ [1] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### infile = self.getStr('InFile') while not rje.exists(infile): infile = rje.choice( 'File "%s" not found. Input file name? (Blank to quit):' % infile) if not infile: return self.printLog('#QUIT', 'Execution terminated!') db = rje_db.Database(self.log, self.cmd_list) db.basefile(rje.baseFile(infile)) sdb = db.addTable(infile, mainkeys='#', delimit='\t', name='SPF.Mod') levels = { 'Level_1': 'k', 'Level_2': 'p', 'Level_3': 'c', 'Level_4': 'o', 'Level_5': 'f', 'Level_6': 'g', 'Level_7': 's' } # k__Bacteria p__Proteobacteria c__Alphaproteobacteria o__Rhodospirillales f__Rhodospirillaceae g__ s__ denovo44 # Unassigned unclassified unclassified unclassified unclassified unclassified unclassified denovo49 ### ~ [1] Modify Text ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### dupnames = [] parents = {} # Parent for each term renamed = [] ex = 0.0 etot = sdb.entryNum() for entry in sdb.entries(): self.progLog('\r#SPF', 'Modifying SPF content: %.1f%%' % (ex / etot)) ex += 100.0 taxon = '' parent = '' #self.debug(entry) for lvl in [ 'Level_1', 'Level_2', 'Level_3', 'Level_4', 'Level_5', 'Level_6', 'Level_7' ]: entry[lvl] = string.replace(entry[lvl], 'unidentified', 'unclassified') #entry[lvl] = string.replace(entry[lvl],'Incertae_sedis','Incertae_sedis-%s' % levels[lvl]) null = '%s__' % levels[lvl] #self.bugPrint(null) #self.bugPrint(entry[lvl]) if entry[lvl] in [ null, 'Unassigned', 'unclassified', '%sunclassified' % null, '%sunidentified' % null, '%sunculturedfungus' % null, '%sIncertae_sedis' % null, '%sunclassified_sp.' % null ]: if not taxon or taxon.endswith('unclassified'): entry[lvl] = '%sunclassified' % null #elif taxon.endswith('unassigned)'): entry[lvl] = '%s%s' % (null,taxon[3:]) #elif taxon.endswith('unassigned)'): entry[lvl] = '%s(%s;%s-unassigned)' % (null,string.split(taxon,'(')[1][:-1],levels[lvl]) elif taxon.endswith('unassigned)'): entry[lvl] = '%s%s;%s-unassigned)' % ( null, taxon[3:][:-1], levels[lvl]) else: entry[lvl] = '%s%s(%s-unassigned)' % ( null, taxon[3:], levels[lvl]) if entry[lvl] in parents: #self.debug(parents[entry[lvl]]) if parent in parents[entry[lvl]]: entry[lvl] = parents[entry[lvl]][parent] else: self.bugPrint(entry[lvl]) self.bugPrint(parents[entry[lvl]]) renamed.append(entry[lvl]) newtax = '%s%d' % (entry[lvl], renamed.count(entry[lvl])) self.warnLog( '%s had multiple parents (%s & %s) -> %s' % (entry[lvl], string.join(parents[entry[lvl]], '|'), parent, newtax)) parents[newtax] = {parent: newtax} parents[entry[lvl]][parent] = newtax entry[lvl] = newtax self.deBug(parents[entry[lvl]]) elif parent: parents[entry[lvl]] = {parent: entry[lvl]} parent = entry[lvl] if entry[lvl][3:] == taxon[3:]: if (entry[lvl], taxon) not in dupnames: dupnames.append((entry[lvl], taxon)) #self.bugPrint(entry[lvl]) taxon = entry[lvl] #self.debug(entry) #self.debug(parents) self.printLog('\r#SPF', 'Modifying SPF content complete.') dupnames.sort() for (dupA, dupB) in dupnames: self.warnLog('Duplicate taxa names: %s & %s' % (dupA, dupB)) ### ~ [2] Save to file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### sdb.saveToFile(savefields=sdb.list['Fields'][1:]) ### ~ [3] Compress to different taxonomic levels ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### compress = [ 'Level_1', 'Level_2', 'Level_3', 'Level_4', 'Level_5', 'Level_6', 'Level_7', '#' ] dump = compress.pop(-1) rules = {'Observation Ids': 'list', dump: 'str'} sdb.dropField('Observation Ids') while compress: sdb.compress(compress, rules=rules, default='sum', best=[], joinchar='|') #if dump == '#': sdb.dropField(dump) sdb.saveToFile( '%s.SPF.%s.%s.spf' % (rje.baseFile(infile), compress[-1], levels[compress[-1]])) dump = compress.pop(-1) rules[dump] = 'list' return except: self.errorLog(self.zen()) raise # Delete this if method error not terrible