def taxonomy_reads_pipeline(db, inputf, outputf, sample): tag = sample.name #************************************************** #************** merge paired end reads ************ #************************************************** #************************************************** #************** Alignment using bowtie ************ #************************************************** bowtie = root.program('bowtie2', sample) status = bowtie.run(db.bowtie, inputf, outputf, tag) # if the alignment cannot be made, stop the process and report, at this point the alignment has to be stopped. #if(status!=0): a=0 #************************************************** #*************** Post-Processing ****************** #************************************************** # filename: alignment file (sam format) # taxo: sequence, id.taxonomy from the dataset.taxo # lens: lengths of the sequences. dataset.lens # taxodb: taxonomy from the dataset taxo.db abundance = sam.process(outputf + "/" + tag + ".sam", db.taxo, db.len, db.taxodb)
def idbaud(projectid,sampleid,db,protocol,reads1, reads2, good_reads): #db=root.dataset(db) #1 get project path x=sql.SQL(root.filedb()) xpath=x.project(projectid)[0][4] #print 'here------------' ########################################################################### #2 update the reads on the sql dataset, doit anyway, so if the sample is re run just take the new input, it could be modified. ########################################################################### val=x.exe('update samples set reads1="'+reads1+'" where project_id="'+projectid+'" and sample_id="'+sampleid+'"') val=x.exe('update samples set reads2="'+reads2+'" where project_id="'+projectid+'" and sample_id="'+sampleid+'"') ########################################################################### #3 get the sample full information - load the class samples ########################################################################### samples=x.exe('select * from samples where project_id="'+projectid+'" and sample_id="'+sampleid+'"') sample=samples[0] sample=root.samples(sample,xpath) root.mkdir(sample.assemblyDir) ########################################################################### # 4.1 Run fq2fa - this is used by udba_ud program ########################################################################### idba_ud=root.program('idba_ud',sample,db) update_status(x,sampleid,db.id,protocol,"Preprocessing") fq2fa=root.program('fq2fa', sample,db) if not root.isdir(idba_ud.out): fq2fa.run() #make sure that there is a scaffold.fa file. If not, it computes again the fastq to fasta and the assembly ########################################################################### # 4.2 Run idba_ud - assembly the samples ########################################################################### update_status(x,sampleid,db.id,protocol,"Assembling") idba_ud=root.program('idba_ud',sample,db) if not root.isdir(idba_ud.out): idba_ud.run(); os.system(' cd ' + idba_ud.path + ' && rm kmer contig-* align-* graph-* local-contig-* reads.fa') ########################################################################### # 4.2 Run gene finder - look at the genes over the scaffolds ########################################################################### prodigal=root.program("prodigal", sample,db) update_status(x,sampleid,db.id,protocol,"Finding Genes") if not root.isdir(prodigal.output+".gff"): prodigal.run() if db.name=="abcdefghij": print "MetaPlAn2" update_status(x,sampleid,db.id,protocol,"Processing") metaphlan=root.program('MetaPhlAn',sample,db) if not root.isdir(metaphlan.out): metaphlan.run() #print "Here 2" G=txp.metaphlan_taxonomy_tree(metaphlan.out) abn=root.SampleResults(sample,G,protocol, db.name, "taxonomy", metaphlan.out) abn.start() update_status(x,sampleid,db.id,protocol,"Done") if db.name=='MyTaxa': #print "MyTaxa" taxa=root.mytaxa(sample,db) update_status(x,sampleid,db.id,protocol,"Screening") if not root.isdir(taxa.output+".prot.mytaxa.fa"): taxa.pre() if not root.isdir(taxa.output+".MyTaxa.matches.daa"): taxa.align() if not root.isdir(taxa.output+".MyTaxa.align"): taxa.postd() if not root.isdir(taxa.output+".MyTaxa.input"): taxa.mpre() if not root.isdir(taxa.output+".MyTaxa.out"): taxa.run() update_status(x,sampleid,db.id,protocol,"Quantification") data=taxa.postM() G=txp.mytaxa_taxonomy_tree(data,taxa.output+".MyTaxa.matches.taxonomy.abundance") abn=root.SampleResults(sample,G,protocol, "MyTaxa", "taxonomy", taxa.output+".MyTaxa.matches") abn.start() update_status(x,sampleid,db.id,protocol,"Done") if not db.taxo=="none": print "taxonomy" ########################################################################### # 4.3 Run bowtie to find matches ########################################################################### update_status(x,sampleid,db.id,protocol,"Screening") if db.name=="ryaetguxun": blastn=root.program('diamond_blastp',sample,db) else: blastn=root.program('blastn',sample,db) blastn.run() #blastn.run() ########################################################################### # 4.4 taxonomy abundance ########################################################################### update_status(x,sampleid,db.id,protocol,"Quantification") abundance=pb(blastn.out, db.taxo, db.len, db.taxodb, "taxonomy", db.name, "none",good_reads) ########################################################################### # 4.5 processing Visualization ########################################################################### G=txp.taxonomy_tree(abundance,blastn.out, protocol, "taxonomy", db.name ) abn=root.SampleResults(sample,G,protocol, db.name, "taxonomy", blastn.out) abn.start() root.updateStatus(x,projectid,sampleid,"done") update_status(x,sampleid,db.id,protocol,"Done") if not db.func=="none": print "functional annotation" update_status(x,sampleid,db.id,protocol,"Screening") fileso=root.result_files(projectid, "function", protocol, sampleid, db.name) ########################################################################### # 4.3 Run bowtie to find matches ########################################################################### root.updateStatus(x,projectid,sampleid,"functional annotation") blastn=root.program('diamond_blastp',sample,db) blastn.run() ########################################################################### # 4.4 taxonomy abundance ########################################################################### update_status(x,sampleid,db.id,protocol,"Quantification") abundance=pb(blastn.out, db.func, db.len, db.funcdb, "function", db.name, fileso.GGenes+".rpkm", good_reads) ########################################################################### # 4.5 processing Visualization ########################################################################### abn=root.SampleResults(sample,'none',protocol, db.name, "function", blastn.out) abn.createFuncDb(abundance) update_status(x,sampleid,db.id,protocol,"Done")
def process(projectid, sampleid, db, protocol, reads1, reads2, good_reads): #db=root.dataset(db) x = sql.SQL(root.filedb()) xpath = x.project(projectid)[0][4] val = x.exe('update samples set reads1="' + reads1 + '" where project_id="' + projectid + '" and sample_id="' + sampleid + '"') val = x.exe('update samples set reads2="' + reads2 + '" where project_id="' + projectid + '" and sample_id="' + sampleid + '"') samples = x.exe('select * from samples where project_id="' + projectid + '" and sample_id="' + sampleid + '"') sample = samples[0] sample = root.samples(sample, xpath) root.mkdir(sample.matchesDir) rdir = root.__ROOTPRO__ + "/" + projectid + "/READS/" if db.name == "abcdefghij": #print "MetaPhlAnn" update_status(x, sampleid, db.id, protocol, "Processing") metaphlan = root.program('MetaPhlAnR', sample, db) if not root.isdir(metaphlan.out): metaphlan.run() G = txp.metaphlan_taxonomy_tree(metaphlan.out) abn = root.SampleResults(sample, G, protocol, db.name, "taxonomy", metaphlan.out) abn.start() update_status(x, sampleid, db.id, protocol, "Done") if not db.taxo == "none": # run bowtie using the paired end reads update_status(x, sampleid, db.id, 'matches', "Screening") cmd = " ".join([ root.__ROOTEXEDIR__ + 'bowtie2', '--very-fast-local -p ' + p + ' --no-unal --no-hd --no-sq -x', db.bowtie, '-1', sample.reads1, '-2', sample.reads2, '-S', sample.matchesDir + '/alignment.' + db.id + '.matches >>', root.log, '2>&1' ]) if not root.isdir(sample.matchesDir + '/alignment.' + db.id + '.matches'): os.system(cmd) #process output in sam format to get genes and number of reads per gene. update_status(x, sampleid, db.id, protocol, "Quantification") if not root.isdir(sample.matchesDir + '/alignment.' + db.id + '.matches.taxonomy.abundance.results.sqlite3.db'): abundance = parse_sam( sample.matchesDir + '/alignment.' + db.id + '.matches', db, good_reads) G = txp.taxonomy_tree( abundance, sample.matchesDir + '/alignment.' + db.id + '.matches', protocol, "taxonomy", db.id) abn = root.SampleResults(sample, G, protocol, db.id, "taxonomy", sample.matchesDir + '/alignment.' + db.id + '.matches') # Store data in the sql TABLE abn.start() update_status(x, sampleid, db.id, protocol, "Done") return 'success' if not db.func == "none": fileso = root.result_files(projectid, "function", protocol, sampleid, db.name) #Merge paired ends update_status(x, sampleid, db.id, protocol, "Merge") cmd = " ".join([ 'python', root.__ROOTEXEDIR__ + "pairend_join.py -s -p " + p + " -m 8 -o ", sample.matchesDir + '/merged.reads.fastq', sample.reads1, sample.reads2 ]) #print cmd root.flog(cmd) #print cmd if not root.isdir(sample.matchesDir + '/merged.reads.fastq'): os.system(cmd) #Get fasta files cmd = ' '.join([ root.__ROOTEXEDIR__ + '/seqtk seq -a', sample.matchesDir + '/merged.reads.fastq >', sample.matchesDir + '/merged.reads.fasta' ]) if not root.isdir(sample.matchesDir + '/merged.reads.fasta'): os.system(cmd) #BlastX from diamond update_status(x, sampleid, db.id, protocol, "Screening") dout = sample.matchesDir + 'alignment.' + db.id din = sample.matchesDir + '/merged.reads.fasta' cmd = ' '.join([ root.__ROOTEXEDIR__ + '/diamond blastx --id 60 -p ' + p + ' -k 1 -e 1e-5 -d', db.diamond, '-a', dout + '.pre', '-q', din, '>>', root.log, "2>&1" ]) if not root.isdir(dout + '.daa'): os.system(cmd) cmd = ' '.join([ root.__ROOTEXEDIR__ + '/diamond view -a', dout + '.pre.daa', '-o', dout + '.matches -f tab', ">>", root.log, "2>&1" ]) if not root.isdir(dout + '.matches'): os.system(cmd) # parse diamond output update_status(x, sampleid, db.id, protocol, "Quantification") if not root.isdir(sample.matchesDir + '/alignment.' + db.id + '.matches.function.abundance.results.sqlite3.db'): abundance = pb(dout + '.matches', db.func, db.len, db.funcdb, "function", db.name, fileso.GGenes + ".rpkm", good_reads) #abundance=pdx(dout+'.matches', db, good_reads) abn = root.SampleResults(sample, 'none', protocol, db.name, "function", dout + '.matches') abn.createFuncDb(abundance) update_status(x, sampleid, db.id, protocol, "Done") os.system('rm ' + sample.matchesDir + '/merged.reads.fastq >> ' + root.log + " 2>&1") os.system('rm ' + sample.matchesDir + '/merged.reads.fasta >> ' + root.log + " 2>&1") return 'success'