Пример #1
0
    def perform_alignment(self,
                          sra_object,
                          out_suffix="_bowtie2",
                          out_dir="",
                          objectid="NA"):
        """Function to perform alignment using sra_object.
        
        Parameters
        ----------
        
        sra_object SRA object
            An object of type SRA. The path to fastq files will be obtained from this object.
        out_suffix: string
            Suffix for the output sam file
        out_dir: string
            Directory to save the results. Default value is sra_object.directory
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
        :return: Returns the sorted bam file path after converting sam to bam and sorting it
        :rtype: string
        """
        if not out_dir:
            out_dir = sra_object.directory
        else:
            if not pu.check_paths_exist(out_dir):
                pu.mkdir(out_dir)

        #create path to output sam file
        outSamFile = os.path.join(
            out_dir, sra_object.srr_accession + out_suffix + ".sam")
        #outBamFile=os.path.join(out_dir,sra_object.srr_accession+out_suffix+"_sorted.bam")

        #find layout and fq file paths
        if sra_object.layout == 'PAIRED':
            internal_kwargs = {
                "-1": sra_object.fastq_path,
                "-2": sra_object.fastq2_path,
                "-S": outSamFile
            }
        else:
            internal_kwargs = {"-U": sra_object.fastq_path, "-S": outSamFile}

        status = self.run(None,
                          objectid=sra_object.srr_accession,
                          target=outSamFile,
                          **internal_kwargs)

        if status:
            if not pu.check_files_exist(outSamFile) and not _dryrun:
                return ""
            #convert to bam before returning; returns outBamFile
            return tools.Samtools().sam_sorted_bam(outSamFile)

        return ""
Пример #2
0
def test_samtools():
    #test sam to sorted bam
    sm=tools.Samtools()    
    sortedBam=sm.sam_sorted_bam(testVars.hisatSam,out_dir=testVars.testDir)
    print("check:"+sortedBam)
    st=pu.check_files_exist(sortedBam)
    assert st==True, "Failed to convert sam to sorted bam"
    
    #test merge
    mergedBam=sm.merge_bam(testVars.hisatSortedBam,testVars.starSortedBam,out_dir=testVars.testDir,**{"-f":""})
    st=pu.check_files_exist(mergedBam)
    assert st==True, "Failed to merge bam"
Пример #3
0
def test_pipeline1():
    sraOb = sra.SRA(srr, workingDir)
    st = sraOb.download_sra()
    assert st == True, "SRA download failed"

    st = sraOb.run_fasterqdump(delete_sra=False,
                               **{
                                   "-e": "8",
                                   "-f": "",
                                   "-t": workingDir
                               })
    assert st == True, "fqdump failed"

    bbdOpts = {
        "ktrim": "r",
        "k": "23",
        "mink": "11",
        "qtrim": "'rl'",
        "trimq": "10",
        "--": ("-Xmx2g", ),
        "ref": testVars.bbdukAdapters
    }
    bbdOb = qc.BBmap(**bbdOpts)
    st = sraOb.perform_qc(bbdOb)
    assert st == True, "bbduk failed"

    tgOpts = {
        "--cores": "10",
        "-o": testVars.testDir,
        "--paired": "",
        "--": (fq1, fq2)
    }
    tg = qc.Trimgalore(**tgOpts)
    st = sraOb.perform_qc(tg)
    assert st == True, "tg failed"

    #runbowtie2
    bt = mapping.Bowtie2(bowtie2_index="")
    assert bt.check_index() == False, "Failed bowtie2 check_index"
    st = bt.build_index(testVars.testDir + "/btIndex", "bowtieIndex",
                        testVars.genome)
    assert st == True, "Failed to build bowtie2 index"
    st = bt.perform_alignment(sraOb)
    assert os.path.isfile(st) == True, "bowtie failed"

    hsOpts = {"--dta-cufflinks": "", "-p": "8"}
    hs = mapping.Hisat2(hisat2_index="", **hsOpts)
    st = hs.build_index(testVars.testDir, "hisatindex", testVars.genome)
    assert st == True, "Failed to build hisat2 index"
    #perform alignment with sraobject
    st = hs.perform_alignment(sraOb)
    assert os.path.isfile(st) == True, "hisat failed"

    hisatSam = st
    samOb = tools.Samtools(**{"-@": "8"})
    bam = samOb.sam_sorted_bam(hisatSam, delete_sam=False, delete_bam=False)
    assert os.path.isfile(bam) == True, "sam to bam failed"

    stie = assembly.Stringtie(reference_gtf=testVars.gtf)
    result = stie.perform_assembly(bam, out_dir=testVars.testDir)
    assert pu.check_files_exist(result) == True, "Failed stringtie"

    tr = assembly.Trinity()
    tr_out = tr.perform_assembly(sraOb, verbose=True)
    assert pu.check_files_exist(tr_out) == True, "Failed stringtie"

    kl = quant.Kallisto(kallisto_index="")
    assert kl.check_index() == False, "Failed kallisto check_index"
    st = kl.build_index(index_path=testVars.testDir + "/kallistoIndex",
                        index_name="kalIndex",
                        fasta=testVars.cdna)
    assert st == True, "Failed to build kallisto index"
    st = kl.perform_quant(sraOb)
    assert os.path.isdir(st) == True, "Failed to run kallisto"

    sl = quant.Salmon(salmon_index="")
    assert sl.check_index() == False, "Failed salmon check_index"
    st = sl.build_index(index_path=testVars.testDir + "/salmonIndex",
                        index_name="salIndex",
                        fasta=testVars.cdna)
    assert st == True, "Failed to build salmon index"

    st = sl.perform_quant(sraOb)
    assert os.path.isdir(st) == True, "Failed to run salmon"
Пример #4
0
if hs.build_index(workingDir + "/maizeIndex", "maizeInd", GENOME,
                  **hisat2_buildArgs):
    print("Indexing done.")

if hs.check_index():
    print("Index {} exists".format(hs.hisat2_index))

samList = []
for ob in sraObjects:
    print("Processing {}...".format(ob.srr_accession))
    thisSam = hs.perform_alignment(ob, **{"-p": "16"})
    if thisSam:
        samList.append(thisSam)
print("Alignment done!! Sam files:" + ",".join(samList))

samOb = tools.Samtools(**{"-@": "16"})
bamList = []
i = 0
for sam in samList:
    print("Processing:" + sam)
    thisBam = samOb.sam_sorted_bam(sam,
                                   delete_sam=True,
                                   delete_bam=True,
                                   objectid=sraObjects[i].srr_accession)
    i += 1
    if thisBam:
        bamList.append(thisBam)
print("Sorted bam files:" + ",".join(bamList))

st = assembly.Stringtie()
gtfList = []
Пример #5
0
dm.build_index(infa,"diamondDB",out_dir="/Users/usingh/work/urmi/tests/mikado/dout",threads=8)


listFile="/Users/usingh/work/urmi/tests/mikado/list.txt"
genome="/Users/usingh/work/urmi/tests/mikado/chr5.fas"
mode="permissive"
scoring="plants.yaml"
junctions="/Users/usingh/work/urmi/tests/mikado/junctions.bed"

mk=tools.Mikado()
mk.runMikadoFull(listFile,genome,mode,scoring,junctions,"mkconf",infa,dm,8,out_dir="/Users/usingh/work/urmi/tests/mikado/pyrout",verbose=False)
"""
#new tests
# test samtools
sam=testDir+"/test_files/athaliana/mapping/hisat2.sam"
sm=tools.Samtools(threads=5)
bam1=sm.sam_to_bam(sam,out_suffix="test2",threads=3, delete_sam=False,verbose=True,quiet=False,logs=True,objectid="NA",**{"-@":"4"})
print(bam1)
bam2=sm.sort_bam(bam1,out_suffix="test2",threads=2,delete_bam=False,verbose=True,quiet=False,logs=True,objectid="NA")
print(bam2)
#bam=sm.sam_sorted_bam(sam,delete_sam=False,delete_bam=False)

txd=tools.Transdecoder()
infa="/Users/usingh/work/urmi/tests/txd/test.fa"
outdir=txd.run_transdecoder_longorfs(infa,out_dir="/Users/usingh/work/urmi/tests/txd/mtout1")
print(outdir)

poutdir="/Users/usingh/work/urmi/tests/txd/mypredout"
predout=txd.run_transdecoder_predict(infa,longorfs_dir=outdir,out_dir=poutdir)
print(predout)
Пример #6
0
    "mink": "11",
    "qtrim": "'rl'",
    "trimq": "10",
    "ref": testVars.bbdukAdapters
}
bbdOb = qc.BBmap(None, **bbdOpts)
tg = qc.Trimgalore()
bt = mapping.Bowtie2(index=testVars.testDir + "/btIndex",
                     genome=testVars.genome)
hsOpts = {"--dta-cufflinks": "", "-p": "8"}
hs = mapping.Hisat2(index=testVars.testDir + "/hisatindex",
                    genome=testVars.genome,
                    **hsOpts)
star = mapping.Star(index=os.path.join(testVars.testDir, "starIndex"),
                    genome=testVars.genome)
samOb = tools.Samtools()
stie = assembly.Stringtie()
kl = quant.Kallisto(index=testVars.testDir + "/kallistoIndex/kalIndex",
                    transcriptome=testVars.cdna)
sl = quant.Salmon(index=testVars.testDir + "/salmonIndex/salIndex",
                  transcriptome=testVars.cdna_big)

#sra ob
sraOb = sra.SRA(srr, workingDir)
st = sraOb.fastq_exists()
assert st == True, "fasterq-dump failed"


def test_pipeline1():
    st = sraOb.trim(bbdOb).align(hs).assemble(stie).quant(kl)
    assert st != None, "pipeline 1 failed"
Пример #7
0
#sraOb.localfastqPath=unMappedReads


#build hisat index

hsOpts={"--dta-cufflinks":"","-p":"12","--mp": "1,1", "--no-spliced-alignment":"", "--rdg": "10000,10000", "--rfg": "10000,10000"}
hs=mapping.Hisat2(hisat2_index="/home/usingh/work/urmi/hoap/test/yeastInd2/index22",**hsOpts)
#hsbArgs={"-p":"8","-a":"","-q":""}
#if hs.buildHisat2Index("/home/usingh/work/urmi/hoap/test/yeastInd2","index22","/home/usingh/work/urmi/hoap/test/hisatYeast/S288C_reference_genome_R64-2-1_20150113/S288C_reference_sequence_R64-2-1_20150113.fsa",**hsbArgs):
#    print("Success")
    
#run hisat
sam=hs.perform_alignment(sraOb,**{"--dta-cufflinks":"","-p":"8"})

#get sorted bam
samOb=tools.Samtools(**{"-@":"8"})
bam=samOb.sam_sorted_bam(sam,delete_sam=True,delete_bam=True)


#bt2=mapping.Bowtie2("/home/usingh/work/urmi/hoap/test/bowtieIndex/rRNAindex")
#bt2.performAlignment(sraOb)

#run stringtie
st=assembly.Stringtie()
g1=st.perform_assembly(bam,objectid="myob")


#gtfs=(g1,)
#test stmerge
#merged=st.performStringtieMerge(g1,g1,outFileSuffix="_stOUT",overwrite=True)
#if not merged: