output_path = param['htseqOutPath'] db_name = param['gsnapDbName'] gsnap_annotation = param['gsnapAnnotation'] Dict = param['symbolIDFile'] inputpath = file_path #=========== (0) enter the directory ================ Message(startMessage, email) os.chdir(file_path) #=========== (1) reads files and trim =============== fastqFiles = list_files(file_path) if trim == 'True': fastqFiles = Trimmomatic(trimmomatic, fastqFiles, phred, trimmoAdapter) print 'list file succeed' #=========== (2) run gsnap to do the mapping ======== if aligner == 'gsnap': map_files = gsnap(fastqFiles, db_path, db_name, gsnap_annotation, thread) else: map_files = STAR(fastqFiles, db_path, thread) print 'align succeed' #=========== (3) samtools to sort the file ========== sorted_bam = sam2bam_sort(map_files, thread) print 'sorted succeed' #=========== (4) htseq_count ======================== htseq_count(sorted_bam, annotation, file_path) print 'htseq count succeed' #=========== (5) htseq symbol to id ================= ID_Convert(Dict, output_path, inputpath) print 'id convert succeed' Message(endMessage, email)
output_path = param['htseqOutPath'] db_name = param['gsnapDbName'] gsnap_annotation = param['gsnapAnnotation'] Dict = param['symbolIDFile'] inputpath = file_path #=========== (0) enter the directory ================ Message(startMessage,email) os.chdir(file_path) #=========== (1) reads files and trim =============== fastqFiles = list_files(file_path) if trim == 'True': fastqFiles = Trimmomatic(trimmomatic,fastqFiles,phred,trimmoAdapter) print 'list file succeed' #=========== (2) run gsnap to do the mapping ======== if aligner == 'gsnap': map_files = gsnap(fastqFiles,db_path, db_name,gsnap_annotation,thread) else: map_files = STAR(fastqFiles,db_path,thread) print 'align succeed' #=========== (3) samtools to sort the file ========== sorted_bam = sam2bam_sort(map_files,thread) print 'sorted succeed' #=========== (4) htseq_count ======================== htseq_count(sorted_bam,annotation,file_path) print 'htseq count succeed' #=========== (5) htseq symbol to id ================= ID_Convert(Dict,output_path,inputpath) print 'id convert succeed' Message(endMessage,email)
except: print 'host align failed' Message('host align failed',email) raise #======== (3) sam to bam and sort ================================ try: sorted_bams = sam2bam_sort(map_files,thread) # [file.sort.bam] print 'host sorted succeed' print 'sorted_bam is: ',sorted_bams except: print 'host sorted failed' Message('host sorted failed',email) raise #======== (4) get htseq Count to host ============================ try: htseq_count(sorted_bams,host_annotation,host_htseqFolder,host_AnnotationSource) print 'host htseqCount succeed' except: print 'host htseq count failed' Message('host htseq count failed',email) raise #======== (5) extract unmapped reads ============================= try: unmap2host_bams = extract_bam(sorted_bams,'unmap',seqType,thread) # [file.sort.unmap.bam] print 'extract unmap2host_bams succeed' print 'unmap2host_bams is: ',unmap2host_bams remove(sorted_bams) # rename files for f in unmap2host_bams: os.rename(f,f[:-4]+'2host.bam') unmap2host_bams = [f[:-4]+'2host.bam' for f in unmap2host_bams] # [file.sort.unmap2host.bam] except:
print 'sorted_bam is: ',sorted_bams except: print 'sorted failed' Message('sorted failed',email) raise #=========== (4) get mapping stats ================== try: flagstat(sorted_bams) print 'flagstat succeed' except: print 'flagstat failed' Message('flagstat failed',email) raise #=========== (4) htseq_count ======================== try: htseq_count(sorted_bams,annotation,output_path,dataSource,htseqBatch) print 'htseq count succeed' except: print 'htseq count failed' Message('htseq count failed',email) raise #=========== (5) htseq symbol to id ================= if dataSource == 'ncbi': try: geneSymbol2EntrezID(Dict,output_path,output_path) print 'id convert succeed' except: print 'id convert failed' Message('id convert failed',email) raise Message(endMessage,email)