def BLAST(seq, seqs, evalue=10): lib_dir = temp('blast-' + str(id(seqs))) res_file = temp('blast-' + str(id(seqs)), str(id(seq))) dbtype = 'nucl' if isinstance(seqs[0], NASeq) else 'prot' seqtype = 'nucl' if isinstance(seq, NASeq) else 'prot' if not op.exists(op.join(lib_dir, seqs.get_file('fa') + '.phr')) \ and not op.exists(op.join(lib_dir, seqs.get_file('fa') + '.nhr')): c = [[app('BLAST','makeblastdb'),'-in',seqs.get_file('fa'),'-out',lib_dir,'-parse_seqids','-dbtype',dbtype]] else: c = [] if dbtype == 'nucl' and seqtype == 'nucl': blprog = 'blastn' # TBLASTX elif dbtype == 'nucl' and seqtype == 'prot': blprog = 'blastx' elif dbtype == 'prot' and seqtype == 'nucl': blprog = 'tblastn' elif dbtype == 'prot' and seqtype == 'prot': blprog = 'blastp' # PSIBLAST, PHIBLAST c += [[app('BLAST',blprog),'-db',lib_dir,'-query',seq.get_file('fa'),'-outfmt','"6 sseqid"','-out',res_file+'-out','-evalue',str(evalue)]] c += [[app('BLAST','blastdbcmd'),'-db',lib_dir,'-entry_batch',res_file+'-out','-out',res_file+'.fa']] run(c) return SeqList(open(res_file + '.fa', 'r'))
def Trimmomatic(seqs, headcrop=None, leading=None, trailing=None, slidingwin=None, minlen=None, illuminaclip=None): if isinstance(seqs, PairedFileSeqList): tend = 'PE' f1, f2 = seqs._f1.get_file('fq'), seqs._f2.get_file('fq') flist = [f1, f2, \ f1 + '.trim.prd.fq', f1 + '.trim.uprd.fq', \ f2 + '.trim.prd.fq', f2 + '.trim.uprd.fq'] elif isinstance(seqs, SeqList): tend = 'SE' f1 = seqs.get_file('fq') flist = [f1, f1 + '.trim.fq'] else: raise TypeError('Need a PairedFileSeqList, FileSeqList or SeqList.') qual = '-phred33' # TODO: determine this from the data c = [app('JAVA', 'java'), '-classpath', \ app('TRIMMOMATIC', 'trimmomatic-0.22.jar'), \ 'org.usadellab.trimmomatic.Trimmomatic' + tend, qual] c += flist if illuminaclip is not None: c += ['ILLUMINACLIP:' + str(illuminaclip.get_file('fa')) + \ ':2:40:15'] # sane defaults? if leading is not None: c += ['LEADING:' + str(leading)] if trailing is not None: c += ['TRAILING:' + str(trailing)] if slidingwin is not None: c += ['SLIDINGWINDOW:' + str(slidingwin[0]) + ':' + \ str(slidingwin[1])] if headcrop is not None: c += ['HEADCROP:' + str(headcrop)] if minlen is not None: c += ['MINLEN:' + str(minlen)] run([c]) if tend == 'PE': return PairedFileSeqList(f1 + '.trim.prd.fq',\ f2 + '.trim.prd.fq') else: return SeqList(f1 + '.trim.fq')