print 'list file succeed' print 'fastqFiles is: ',trim_fastqFiles #======== (2) define group =============================== #defined above #======== (3) align using bwa ============================ try: map_sam = bwa_vari(read_group,trim_fastqFiles,bwaIndex,thread) print 'align succeed' print 'map_sam is: ',map_sam except: print 'align failed' Message('align failed',email) raise #======== (4) Convert sam to sorted bam ================== try: sort_bams = sam2bam_sort(map_sam,thread) print 'sort bam files succeed' print 'sort_bams is: ',sort_bams except: print 'sort bam files failed' Message('sort bam files failed',email) raise #======== (5) Markduplicates using picard ================ try: dedup_files = markduplicates(picard,sort_bams) print 'mark duplicates succeed' print 'dedup_files is: ',dedup_files remove(sort_bams) except: print 'mark duplicates failed' Message('mark duplicates failed',email)
map_files = gsnap(unmap2host_fq_gzs,virus_alignerDb, virus_gsnapDbName,virus_gsnapAnnotation,thread) else: map_files = STAR(unmap2host_fq_gzs,virus_alignerDb,thread,'',['--outSAMunmapped Within']) # [file.sam] new_map_files = [f[:-3]+'sort.unmap2host.sam' for f in map_files] for f1,f2 in zip(map_files,new_map_files): os.rename(f1,f2) # [file.sort.unmap2host.sam] print 'virus align succeed' print 'map_files is: ',new_map_files # remove(unmap2host_fq_gzs) except: print 'virus align failed' Message('virus align failed',email) raise #======== (2) sam to bam and sort ================================ try: sorted_bams = sam2bam_sort(new_map_files,thread) # [file.sort.bam] [file.sort.unmap2host.sort.bam] print 'virus sorted succeed' print 'sorted_bams is: ',sorted_bams except: print 'virus sorted failed' Message('virus sorted failed',email) raise #======== (3) extract reads that mapped and unmapped to virus ============================ try: map2virus_bams = extract_bam(sorted_bams,'map',seqType,thread) # [file.sort.unmap2host.sort.map.bam] unmap2virus_bams = extract_bam(sorted_bams,'unmap',seqType,thread) # [file.sort.unmap2host.sort.unmap.bam] # rename files for f in map2virus_bams: os.rename(f,f[:-29]+'.only2virus.bam') map2virus_bams = [f[:-29]+'.only2virus.bam' for f in map2virus_bams] # [file.only2virus.bam] for f in unmap2virus_bams: os.rename(f,f[:-31]+'.map2neither.bam') # [file.map2neither.bam] unmap2virus_bams = [f[:-31]+'.map2neither.bam' for f in unmap2virus_bams]
print 'list file succeed' print 'fastqFiles is: ', trim_fastqFiles #======== (2) define group =============================== #defined above #======== (3) align using bwa ============================ try: map_sam = bwa_vari(read_group, trim_fastqFiles, bwaIndex, thread) print 'align succeed' print 'map_sam is: ', map_sam except: print 'align failed' Message('align failed', email) raise #======== (4) Convert sam to sorted bam ================== try: sort_bams = sam2bam_sort(map_sam, thread) print 'sort bam files succeed' print 'sort_bams is: ', sort_bams except: print 'sort bam files failed' Message('sort bam files failed', email) raise #======== (5) Markduplicates using picard ================ try: dedup_files = markduplicates(picard, sort_bams) print 'mark duplicates succeed' print 'dedup_files is: ', dedup_files remove(sort_bams) except: print 'mark duplicates failed' Message('mark duplicates failed', email)
output_path = param['htseqOutPath'] db_name = param['gsnapDbName'] gsnap_annotation = param['gsnapAnnotation'] Dict = param['symbolIDFile'] inputpath = file_path #=========== (0) enter the directory ================ Message(startMessage, email) os.chdir(file_path) #=========== (1) reads files and trim =============== fastqFiles = list_files(file_path) if trim == 'True': fastqFiles = Trimmomatic(trimmomatic, fastqFiles, phred, trimmoAdapter) print 'list file succeed' #=========== (2) run gsnap to do the mapping ======== if aligner == 'gsnap': map_files = gsnap(fastqFiles, db_path, db_name, gsnap_annotation, thread) else: map_files = STAR(fastqFiles, db_path, thread) print 'align succeed' #=========== (3) samtools to sort the file ========== sorted_bam = sam2bam_sort(map_files, thread) print 'sorted succeed' #=========== (4) htseq_count ======================== htseq_count(sorted_bam, annotation, file_path) print 'htseq count succeed' #=========== (5) htseq symbol to id ================= ID_Convert(Dict, output_path, inputpath) print 'id convert succeed' Message(endMessage, email)
if aligner == 'gsnap': # check index if os.listdir(alignerDb) == []: gsnap_Db(ref_fa,alignerDb,gsnapDbName,gsnapAnnotation) map_files = gsnap(fastqFiles,alignerDb,gsnapDbName,gsnapAnnotation,thread) # [file.sam] else: map_files = STAR(fastqFiles,alignerDb,thread) print 'align succeed' print 'map_files is: ',map_files except: print 'align failed' Message('host align failed',email) raise #======== (3) sam to bam and sort ================================ try: sorted_bams = sam2bam_sort(map_files,thread) # [file.sort.bam] print 'bam sorted succeed' print 'sorted_bam is: ',sorted_bams except: print 'bam sorted failed' Message('bam sorted failed',email) raise """ #======== (2) use gsnap map to bacteria ======== map_files = gsnap(fastqFiles,microDb_path,microDb_name,'',thread) print 'mapping succeed' #======== (3) sam2Bam and sort bam ============= sorted_bam = sam2bam_sort(map_files) print 'sortting succeed' #======== (4) extract mapped reads ============= mapped_files = extract_mapped(sorted_bam)
# check index if os.listdir(alignerDb) == []: gsnap_Db(ref_fa, alignerDb, gsnapDbName, gsnapAnnotation) map_files = gsnap(fastqFiles, alignerDb, gsnapDbName, gsnapAnnotation, thread) # [file.sam] else: map_files = STAR(fastqFiles, alignerDb, thread) print 'align succeed' print 'map_files is: ', map_files except: print 'align failed' Message('host align failed', email) raise #======== (3) sam to bam and sort ================================ try: sorted_bams = sam2bam_sort(map_files, thread) # [file.sort.bam] print 'bam sorted succeed' print 'sorted_bam is: ', sorted_bams except: print 'bam sorted failed' Message('bam sorted failed', email) raise """ #======== (2) use gsnap map to bacteria ======== map_files = gsnap(fastqFiles,microDb_path,microDb_name,'',thread) print 'mapping succeed' #======== (3) sam2Bam and sort bam ============= sorted_bam = sam2bam_sort(map_files) print 'sortting succeed' #======== (4) extract mapped reads ============= mapped_files = extract_mapped(sorted_bam)
output_path = param['htseqOutPath'] db_name = param['gsnapDbName'] gsnap_annotation = param['gsnapAnnotation'] Dict = param['symbolIDFile'] inputpath = file_path #=========== (0) enter the directory ================ Message(startMessage,email) os.chdir(file_path) #=========== (1) reads files and trim =============== fastqFiles = list_files(file_path) if trim == 'True': fastqFiles = Trimmomatic(trimmomatic,fastqFiles,phred,trimmoAdapter) print 'list file succeed' #=========== (2) run gsnap to do the mapping ======== if aligner == 'gsnap': map_files = gsnap(fastqFiles,db_path, db_name,gsnap_annotation,thread) else: map_files = STAR(fastqFiles,db_path,thread) print 'align succeed' #=========== (3) samtools to sort the file ========== sorted_bam = sam2bam_sort(map_files,thread) print 'sorted succeed' #=========== (4) htseq_count ======================== htseq_count(sorted_bam,annotation,file_path) print 'htseq count succeed' #=========== (5) htseq symbol to id ================= ID_Convert(Dict,output_path,inputpath) print 'id convert succeed' Message(endMessage,email)
elif aligner == 'STAR': if not os.path.exists(db_path): os.mkdir(db_path) if os.listdir(db_path) == []: STAR_Db(db_path,ref_fa,thread) map_files = STAR(trim_fastqFiles,db_path,thread,annotation,['--outSAMtype BAM SortedByCoordinate','--quantMode GeneCounts']) elif aligner == 'bowtie': map_files = bowtie(trim_fastqFiles,db_path,thread=1,otherParameters=['']) print 'align succeed' print 'map_files is: ',map_files except: print 'align failed' Message('align failed',email) raise #=========== (3) samtools to sort the file ========== try: sorted_bams = sam2bam_sort(map_files,thread,'name') print 'sorted succeed' print 'sorted_bam is: ',sorted_bams except: print 'sorted failed' Message('sorted failed',email) raise #=========== (4) get mapping stats ================== try: flagstat(sorted_bams) print 'flagstat succeed' except: print 'flagstat failed' Message('flagstat failed',email) raise #=========== (4) htseq_count ========================