示例#1
0
def taxonomy_reads_pipeline(db, inputf, outputf, sample):
    tag = sample.name
    #**************************************************
    #************** merge paired end reads ************
    #**************************************************

    #**************************************************
    #************** Alignment using bowtie ************
    #**************************************************

    bowtie = root.program('bowtie2', sample)
    status = bowtie.run(db.bowtie, inputf, outputf, tag)
    # if the alignment cannot be made, stop the process and report, at this point the alignment has to be stopped.
    #if(status!=0): a=0
    #**************************************************
    #*************** Post-Processing ******************
    #**************************************************
    # filename: alignment file (sam format)
    # taxo: sequence, id.taxonomy from the dataset.taxo
    # lens: lengths of the sequences. dataset.lens
    # taxodb: taxonomy from the dataset taxo.db
    abundance = sam.process(outputf + "/" + tag + ".sam", db.taxo, db.len,
                            db.taxodb)
示例#2
0
def idbaud(projectid,sampleid,db,protocol,reads1, reads2, good_reads):
    #db=root.dataset(db)
    #1 get project path
    x=sql.SQL(root.filedb())
    xpath=x.project(projectid)[0][4]
    #print 'here------------'
    ###########################################################################
    #2 update the reads on the sql dataset, doit anyway, so if the sample is re run just take the new input, it could be modified.
    ###########################################################################
    val=x.exe('update samples set reads1="'+reads1+'" where project_id="'+projectid+'" and sample_id="'+sampleid+'"')
    val=x.exe('update samples set reads2="'+reads2+'" where project_id="'+projectid+'" and sample_id="'+sampleid+'"')
    ###########################################################################
    #3 get the sample full information - load the class samples
    ###########################################################################
    samples=x.exe('select * from samples where project_id="'+projectid+'" and sample_id="'+sampleid+'"')
    sample=samples[0]
    sample=root.samples(sample,xpath)
    root.mkdir(sample.assemblyDir)
    ###########################################################################
    # 4.1 Run fq2fa -  this is used by udba_ud program
    ###########################################################################
    idba_ud=root.program('idba_ud',sample,db)
    update_status(x,sampleid,db.id,protocol,"Preprocessing")
    fq2fa=root.program('fq2fa', sample,db)
    if not root.isdir(idba_ud.out): fq2fa.run() #make sure that there is a scaffold.fa file. If not, it computes again the fastq to fasta and the assembly

    ###########################################################################
    # 4.2 Run idba_ud -  assembly the samples
    ###########################################################################
    update_status(x,sampleid,db.id,protocol,"Assembling")
    idba_ud=root.program('idba_ud',sample,db)
    if not root.isdir(idba_ud.out):
        idba_ud.run();
        os.system(' cd ' + idba_ud.path + ' &&  rm kmer contig-* align-* graph-* local-contig-* reads.fa')

    ###########################################################################
    # 4.2 Run gene finder -  look at the genes over the scaffolds
    ###########################################################################
    prodigal=root.program("prodigal", sample,db)
    update_status(x,sampleid,db.id,protocol,"Finding Genes")
    if not root.isdir(prodigal.output+".gff"): prodigal.run()

    if db.name=="abcdefghij":
        print "MetaPlAn2"
        update_status(x,sampleid,db.id,protocol,"Processing")
        metaphlan=root.program('MetaPhlAn',sample,db)
        if not root.isdir(metaphlan.out): metaphlan.run()
        #print "Here 2"
        G=txp.metaphlan_taxonomy_tree(metaphlan.out)
        abn=root.SampleResults(sample,G,protocol, db.name, "taxonomy", metaphlan.out)
        abn.start()
        update_status(x,sampleid,db.id,protocol,"Done")

    if db.name=='MyTaxa':
        #print "MyTaxa"
        taxa=root.mytaxa(sample,db)
        update_status(x,sampleid,db.id,protocol,"Screening")
        if not root.isdir(taxa.output+".prot.mytaxa.fa"): taxa.pre()
        if not root.isdir(taxa.output+".MyTaxa.matches.daa"): taxa.align()
        if not root.isdir(taxa.output+".MyTaxa.align"): taxa.postd()
        if not root.isdir(taxa.output+".MyTaxa.input"): taxa.mpre()
        if not root.isdir(taxa.output+".MyTaxa.out"): taxa.run()
        update_status(x,sampleid,db.id,protocol,"Quantification")
        data=taxa.postM()
        G=txp.mytaxa_taxonomy_tree(data,taxa.output+".MyTaxa.matches.taxonomy.abundance")
        abn=root.SampleResults(sample,G,protocol, "MyTaxa", "taxonomy", taxa.output+".MyTaxa.matches")
        abn.start()
        update_status(x,sampleid,db.id,protocol,"Done")

    if not db.taxo=="none":
        print "taxonomy"
        ###########################################################################
        # 4.3 Run bowtie to find matches
        ###########################################################################
        update_status(x,sampleid,db.id,protocol,"Screening")
        if db.name=="ryaetguxun":
            blastn=root.program('diamond_blastp',sample,db)
        else:
            blastn=root.program('blastn',sample,db)
        blastn.run()
        #blastn.run()
        ###########################################################################
        # 4.4 taxonomy abundance
        ###########################################################################
        update_status(x,sampleid,db.id,protocol,"Quantification")
        abundance=pb(blastn.out, db.taxo, db.len, db.taxodb, "taxonomy", db.name, "none",good_reads)
        ###########################################################################
        # 4.5 processing Visualization
        ###########################################################################
        G=txp.taxonomy_tree(abundance,blastn.out, protocol, "taxonomy", db.name )
        abn=root.SampleResults(sample,G,protocol, db.name, "taxonomy", blastn.out)
        abn.start()
        root.updateStatus(x,projectid,sampleid,"done")
        update_status(x,sampleid,db.id,protocol,"Done")
    if not db.func=="none":
        print "functional annotation"
        update_status(x,sampleid,db.id,protocol,"Screening")
        fileso=root.result_files(projectid, "function", protocol, sampleid, db.name)
        ###########################################################################
        # 4.3 Run bowtie to find matches
        ###########################################################################
        root.updateStatus(x,projectid,sampleid,"functional annotation")
        blastn=root.program('diamond_blastp',sample,db)
        blastn.run()
        ###########################################################################
        # 4.4 taxonomy abundance
        ###########################################################################
        update_status(x,sampleid,db.id,protocol,"Quantification")
        abundance=pb(blastn.out, db.func, db.len, db.funcdb, "function", db.name, fileso.GGenes+".rpkm", good_reads)
        ###########################################################################
        # 4.5 processing Visualization
        ###########################################################################
        abn=root.SampleResults(sample,'none',protocol, db.name, "function", blastn.out)
        abn.createFuncDb(abundance)
        update_status(x,sampleid,db.id,protocol,"Done")
示例#3
0
def process(projectid, sampleid, db, protocol, reads1, reads2, good_reads):
    #db=root.dataset(db)
    x = sql.SQL(root.filedb())
    xpath = x.project(projectid)[0][4]
    val = x.exe('update samples set reads1="' + reads1 +
                '" where project_id="' + projectid + '" and sample_id="' +
                sampleid + '"')
    val = x.exe('update samples set reads2="' + reads2 +
                '" where project_id="' + projectid + '" and sample_id="' +
                sampleid + '"')
    samples = x.exe('select * from samples where project_id="' + projectid +
                    '" and sample_id="' + sampleid + '"')
    sample = samples[0]
    sample = root.samples(sample, xpath)
    root.mkdir(sample.matchesDir)
    rdir = root.__ROOTPRO__ + "/" + projectid + "/READS/"

    if db.name == "abcdefghij":
        #print "MetaPhlAnn"
        update_status(x, sampleid, db.id, protocol, "Processing")
        metaphlan = root.program('MetaPhlAnR', sample, db)
        if not root.isdir(metaphlan.out): metaphlan.run()
        G = txp.metaphlan_taxonomy_tree(metaphlan.out)
        abn = root.SampleResults(sample, G, protocol, db.name, "taxonomy",
                                 metaphlan.out)
        abn.start()
        update_status(x, sampleid, db.id, protocol, "Done")

    if not db.taxo == "none":
        # run bowtie using the paired end reads
        update_status(x, sampleid, db.id, 'matches', "Screening")
        cmd = " ".join([
            root.__ROOTEXEDIR__ + 'bowtie2',
            '--very-fast-local -p ' + p + ' --no-unal --no-hd --no-sq -x',
            db.bowtie, '-1', sample.reads1, '-2', sample.reads2, '-S',
            sample.matchesDir + '/alignment.' + db.id + '.matches >>',
            root.log, '2>&1'
        ])
        if not root.isdir(sample.matchesDir + '/alignment.' + db.id +
                          '.matches'):
            os.system(cmd)
        #process output in sam format to get genes and number of reads per gene.
        update_status(x, sampleid, db.id, protocol, "Quantification")
        if not root.isdir(sample.matchesDir + '/alignment.' + db.id +
                          '.matches.taxonomy.abundance.results.sqlite3.db'):
            abundance = parse_sam(
                sample.matchesDir + '/alignment.' + db.id + '.matches', db,
                good_reads)
            G = txp.taxonomy_tree(
                abundance,
                sample.matchesDir + '/alignment.' + db.id + '.matches',
                protocol, "taxonomy", db.id)
            abn = root.SampleResults(sample, G, protocol, db.id, "taxonomy",
                                     sample.matchesDir + '/alignment.' +
                                     db.id +
                                     '.matches')  # Store data in the sql TABLE
            abn.start()
        update_status(x, sampleid, db.id, protocol, "Done")
        return 'success'
    if not db.func == "none":
        fileso = root.result_files(projectid, "function", protocol, sampleid,
                                   db.name)
        #Merge paired ends
        update_status(x, sampleid, db.id, protocol, "Merge")
        cmd = " ".join([
            'python',
            root.__ROOTEXEDIR__ + "pairend_join.py -s -p " + p + " -m 8 -o ",
            sample.matchesDir + '/merged.reads.fastq', sample.reads1,
            sample.reads2
        ])
        #print cmd
        root.flog(cmd)  #print cmd
        if not root.isdir(sample.matchesDir + '/merged.reads.fastq'):
            os.system(cmd)
        #Get fasta files
        cmd = ' '.join([
            root.__ROOTEXEDIR__ + '/seqtk seq -a',
            sample.matchesDir + '/merged.reads.fastq >',
            sample.matchesDir + '/merged.reads.fasta'
        ])
        if not root.isdir(sample.matchesDir + '/merged.reads.fasta'):
            os.system(cmd)
        #BlastX from diamond
        update_status(x, sampleid, db.id, protocol, "Screening")
        dout = sample.matchesDir + 'alignment.' + db.id
        din = sample.matchesDir + '/merged.reads.fasta'
        cmd = ' '.join([
            root.__ROOTEXEDIR__ + '/diamond blastx --id 60 -p ' + p +
            ' -k 1 -e 1e-5 -d', db.diamond, '-a', dout + '.pre', '-q', din,
            '>>', root.log, "2>&1"
        ])
        if not root.isdir(dout + '.daa'): os.system(cmd)
        cmd = ' '.join([
            root.__ROOTEXEDIR__ + '/diamond view -a', dout + '.pre.daa', '-o',
            dout + '.matches -f tab', ">>", root.log, "2>&1"
        ])
        if not root.isdir(dout + '.matches'): os.system(cmd)
        # parse diamond output
        update_status(x, sampleid, db.id, protocol, "Quantification")
        if not root.isdir(sample.matchesDir + '/alignment.' + db.id +
                          '.matches.function.abundance.results.sqlite3.db'):
            abundance = pb(dout + '.matches', db.func, db.len, db.funcdb,
                           "function", db.name, fileso.GGenes + ".rpkm",
                           good_reads)
            #abundance=pdx(dout+'.matches', db, good_reads)
            abn = root.SampleResults(sample, 'none', protocol, db.name,
                                     "function", dout + '.matches')
            abn.createFuncDb(abundance)
        update_status(x, sampleid, db.id, protocol, "Done")
        os.system('rm ' + sample.matchesDir + '/merged.reads.fastq >> ' +
                  root.log + " 2>&1")
        os.system('rm ' + sample.matchesDir + '/merged.reads.fasta >> ' +
                  root.log + " 2>&1")
        return 'success'