def perform_alignment(self, sra_object, out_suffix="_bowtie2", out_dir="", objectid="NA"): """Function to perform alignment using sra_object. Parameters ---------- sra_object SRA object An object of type SRA. The path to fastq files will be obtained from this object. out_suffix: string Suffix for the output sam file out_dir: string Directory to save the results. Default value is sra_object.directory objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. :return: Returns the sorted bam file path after converting sam to bam and sorting it :rtype: string """ if not out_dir: out_dir = sra_object.directory else: if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) #create path to output sam file outSamFile = os.path.join( out_dir, sra_object.srr_accession + out_suffix + ".sam") #outBamFile=os.path.join(out_dir,sra_object.srr_accession+out_suffix+"_sorted.bam") #find layout and fq file paths if sra_object.layout == 'PAIRED': internal_kwargs = { "-1": sra_object.fastq_path, "-2": sra_object.fastq2_path, "-S": outSamFile } else: internal_kwargs = {"-U": sra_object.fastq_path, "-S": outSamFile} status = self.run(None, objectid=sra_object.srr_accession, target=outSamFile, **internal_kwargs) if status: if not pu.check_files_exist(outSamFile) and not _dryrun: return "" #convert to bam before returning; returns outBamFile return tools.Samtools().sam_sorted_bam(outSamFile) return ""
def test_samtools(): #test sam to sorted bam sm=tools.Samtools() sortedBam=sm.sam_sorted_bam(testVars.hisatSam,out_dir=testVars.testDir) print("check:"+sortedBam) st=pu.check_files_exist(sortedBam) assert st==True, "Failed to convert sam to sorted bam" #test merge mergedBam=sm.merge_bam(testVars.hisatSortedBam,testVars.starSortedBam,out_dir=testVars.testDir,**{"-f":""}) st=pu.check_files_exist(mergedBam) assert st==True, "Failed to merge bam"
def test_pipeline1(): sraOb = sra.SRA(srr, workingDir) st = sraOb.download_sra() assert st == True, "SRA download failed" st = sraOb.run_fasterqdump(delete_sra=False, **{ "-e": "8", "-f": "", "-t": workingDir }) assert st == True, "fqdump failed" bbdOpts = { "ktrim": "r", "k": "23", "mink": "11", "qtrim": "'rl'", "trimq": "10", "--": ("-Xmx2g", ), "ref": testVars.bbdukAdapters } bbdOb = qc.BBmap(**bbdOpts) st = sraOb.perform_qc(bbdOb) assert st == True, "bbduk failed" tgOpts = { "--cores": "10", "-o": testVars.testDir, "--paired": "", "--": (fq1, fq2) } tg = qc.Trimgalore(**tgOpts) st = sraOb.perform_qc(tg) assert st == True, "tg failed" #runbowtie2 bt = mapping.Bowtie2(bowtie2_index="") assert bt.check_index() == False, "Failed bowtie2 check_index" st = bt.build_index(testVars.testDir + "/btIndex", "bowtieIndex", testVars.genome) assert st == True, "Failed to build bowtie2 index" st = bt.perform_alignment(sraOb) assert os.path.isfile(st) == True, "bowtie failed" hsOpts = {"--dta-cufflinks": "", "-p": "8"} hs = mapping.Hisat2(hisat2_index="", **hsOpts) st = hs.build_index(testVars.testDir, "hisatindex", testVars.genome) assert st == True, "Failed to build hisat2 index" #perform alignment with sraobject st = hs.perform_alignment(sraOb) assert os.path.isfile(st) == True, "hisat failed" hisatSam = st samOb = tools.Samtools(**{"-@": "8"}) bam = samOb.sam_sorted_bam(hisatSam, delete_sam=False, delete_bam=False) assert os.path.isfile(bam) == True, "sam to bam failed" stie = assembly.Stringtie(reference_gtf=testVars.gtf) result = stie.perform_assembly(bam, out_dir=testVars.testDir) assert pu.check_files_exist(result) == True, "Failed stringtie" tr = assembly.Trinity() tr_out = tr.perform_assembly(sraOb, verbose=True) assert pu.check_files_exist(tr_out) == True, "Failed stringtie" kl = quant.Kallisto(kallisto_index="") assert kl.check_index() == False, "Failed kallisto check_index" st = kl.build_index(index_path=testVars.testDir + "/kallistoIndex", index_name="kalIndex", fasta=testVars.cdna) assert st == True, "Failed to build kallisto index" st = kl.perform_quant(sraOb) assert os.path.isdir(st) == True, "Failed to run kallisto" sl = quant.Salmon(salmon_index="") assert sl.check_index() == False, "Failed salmon check_index" st = sl.build_index(index_path=testVars.testDir + "/salmonIndex", index_name="salIndex", fasta=testVars.cdna) assert st == True, "Failed to build salmon index" st = sl.perform_quant(sraOb) assert os.path.isdir(st) == True, "Failed to run salmon"
if hs.build_index(workingDir + "/maizeIndex", "maizeInd", GENOME, **hisat2_buildArgs): print("Indexing done.") if hs.check_index(): print("Index {} exists".format(hs.hisat2_index)) samList = [] for ob in sraObjects: print("Processing {}...".format(ob.srr_accession)) thisSam = hs.perform_alignment(ob, **{"-p": "16"}) if thisSam: samList.append(thisSam) print("Alignment done!! Sam files:" + ",".join(samList)) samOb = tools.Samtools(**{"-@": "16"}) bamList = [] i = 0 for sam in samList: print("Processing:" + sam) thisBam = samOb.sam_sorted_bam(sam, delete_sam=True, delete_bam=True, objectid=sraObjects[i].srr_accession) i += 1 if thisBam: bamList.append(thisBam) print("Sorted bam files:" + ",".join(bamList)) st = assembly.Stringtie() gtfList = []
dm.build_index(infa,"diamondDB",out_dir="/Users/usingh/work/urmi/tests/mikado/dout",threads=8) listFile="/Users/usingh/work/urmi/tests/mikado/list.txt" genome="/Users/usingh/work/urmi/tests/mikado/chr5.fas" mode="permissive" scoring="plants.yaml" junctions="/Users/usingh/work/urmi/tests/mikado/junctions.bed" mk=tools.Mikado() mk.runMikadoFull(listFile,genome,mode,scoring,junctions,"mkconf",infa,dm,8,out_dir="/Users/usingh/work/urmi/tests/mikado/pyrout",verbose=False) """ #new tests # test samtools sam=testDir+"/test_files/athaliana/mapping/hisat2.sam" sm=tools.Samtools(threads=5) bam1=sm.sam_to_bam(sam,out_suffix="test2",threads=3, delete_sam=False,verbose=True,quiet=False,logs=True,objectid="NA",**{"-@":"4"}) print(bam1) bam2=sm.sort_bam(bam1,out_suffix="test2",threads=2,delete_bam=False,verbose=True,quiet=False,logs=True,objectid="NA") print(bam2) #bam=sm.sam_sorted_bam(sam,delete_sam=False,delete_bam=False) txd=tools.Transdecoder() infa="/Users/usingh/work/urmi/tests/txd/test.fa" outdir=txd.run_transdecoder_longorfs(infa,out_dir="/Users/usingh/work/urmi/tests/txd/mtout1") print(outdir) poutdir="/Users/usingh/work/urmi/tests/txd/mypredout" predout=txd.run_transdecoder_predict(infa,longorfs_dir=outdir,out_dir=poutdir) print(predout)
"mink": "11", "qtrim": "'rl'", "trimq": "10", "ref": testVars.bbdukAdapters } bbdOb = qc.BBmap(None, **bbdOpts) tg = qc.Trimgalore() bt = mapping.Bowtie2(index=testVars.testDir + "/btIndex", genome=testVars.genome) hsOpts = {"--dta-cufflinks": "", "-p": "8"} hs = mapping.Hisat2(index=testVars.testDir + "/hisatindex", genome=testVars.genome, **hsOpts) star = mapping.Star(index=os.path.join(testVars.testDir, "starIndex"), genome=testVars.genome) samOb = tools.Samtools() stie = assembly.Stringtie() kl = quant.Kallisto(index=testVars.testDir + "/kallistoIndex/kalIndex", transcriptome=testVars.cdna) sl = quant.Salmon(index=testVars.testDir + "/salmonIndex/salIndex", transcriptome=testVars.cdna_big) #sra ob sraOb = sra.SRA(srr, workingDir) st = sraOb.fastq_exists() assert st == True, "fasterq-dump failed" def test_pipeline1(): st = sraOb.trim(bbdOb).align(hs).assemble(stie).quant(kl) assert st != None, "pipeline 1 failed"
#sraOb.localfastqPath=unMappedReads #build hisat index hsOpts={"--dta-cufflinks":"","-p":"12","--mp": "1,1", "--no-spliced-alignment":"", "--rdg": "10000,10000", "--rfg": "10000,10000"} hs=mapping.Hisat2(hisat2_index="/home/usingh/work/urmi/hoap/test/yeastInd2/index22",**hsOpts) #hsbArgs={"-p":"8","-a":"","-q":""} #if hs.buildHisat2Index("/home/usingh/work/urmi/hoap/test/yeastInd2","index22","/home/usingh/work/urmi/hoap/test/hisatYeast/S288C_reference_genome_R64-2-1_20150113/S288C_reference_sequence_R64-2-1_20150113.fsa",**hsbArgs): # print("Success") #run hisat sam=hs.perform_alignment(sraOb,**{"--dta-cufflinks":"","-p":"8"}) #get sorted bam samOb=tools.Samtools(**{"-@":"8"}) bam=samOb.sam_sorted_bam(sam,delete_sam=True,delete_bam=True) #bt2=mapping.Bowtie2("/home/usingh/work/urmi/hoap/test/bowtieIndex/rRNAindex") #bt2.performAlignment(sraOb) #run stringtie st=assembly.Stringtie() g1=st.perform_assembly(bam,objectid="myob") #gtfs=(g1,) #test stmerge #merged=st.performStringtieMerge(g1,g1,outFileSuffix="_stOUT",overwrite=True) #if not merged: