def run(args): """ Alternative module: Use the strategy in Bis-SNP to trim 5' bisulfite conversion failures """ options = args.parse_args() if len(options.sam_file) == 0: error("Missing the SAM file, use -s or --sam option.") else: options.sam_file = options.sam_file.split(',') for s in options.sam_file: if not os.path.isfile(s): error("Can't open the SAM file: " + s) sys.exit(1) if len(options.ref_file) == 0: error( "Missing the reference genome fasta file, use -r or --ref option.") else: if not os.path.isfile(options.ref_file): error("Can't open the ref file: " + options.ref_file) if len(options.samtools) != 0: if options.samtools[-1] != '/': options.samtools += '/' if len(options.name) == 0: error("Missing the output file name, use -n or --name options.") sam_inf = options.sam_file ref_file = options.ref_file bsm = options.bsm s_path = options.samtools name = options.name remove_overlap = options.remove_overlap filter_dup = options.filter_dup p_poisson = options.p_poisson gsize = options.gsize not_mapping = options.not_mapping info("Get the all parameter!!") #check the input mapping files sam_format, read_inf = check.check_mapping_file_flag(sam_inf[0], s_path) pre_flag = read_inf.readline().split('\t')[1] if 'p' in pre_flag: single_on = False info("The input mapping files are paired-end sequencing!") else: single_on = True info("The input mapping files are single-end sequencing!") loc_dict = {} if filter_dup: ## if filter_up is TRUE, the duplicate reads will be assessed and shown in Dup_dis.pdf info("The filter_dup has been set True.") info("Assess the duplicate reads...") for sam in sam_inf: #check the input mapping files sam_format, read_inf = check.check_mapping_file(sam, s_path) if single_on: for read in read_inf: loc_dict = LI.Loc_single(read, loc_dict, bsm) else: for read in read_inf: loc_dict = LI.Loc_paired(read, loc_dict, bsm) max_cov = DR.duplicate_report(loc_dict, gsize, p_poisson, name) info('Get the duplicate reads distribution!') #get reference information ref = GR.get_ref(ref_file) trim_position = [] filter_duplicate_reads = 0 filter_nonuniform_trim_bp = 0 filter_nonuniform_trim_bp_CG = 0 filter_remove_overlap_bp = 0 filter_not_mapping_reads = 0 all_reads = 0 not_mapping_reads = 0 all_mapping_bp = 0 ##filter the 5' bisulfite failure for sam in sam_inf: out_sam = sam[:-4] + '_' + name + '_filter.sam' out = open(out_sam, 'w') #check the input mapping files record_mate = {} sam_format, read_inf = check.check_mapping_file_header(sam, s_path) for read in read_inf: #for sam header if read.startswith('@'): out.write(read) continue else: all_reads += 1 ##record the read number (2013-06-20) #Get the read information for trimming #If the read isn't unique mapping, we will get a empty list ([]). #In: single unique mapping read Out: [flag,strand,chr,pos,CIGAR,seq,score] #In: paired unique mapping read Out: [flag,strand,chr,pos1,CIGAR,pos2,insert,seq,score] read_info = RI(read, bsm) read_info = read_info.extract_information() if len(read_info) == 0: not_mapping_reads += 1 if not_mapping: #keep the not_unique mapping reads (or not paired mapping) out.write(read) else: filter_not_mapping_reads += 1 ##record the not mapping read number (2013-06-20) continue if len( loc_dict ) > 0: #the --filter_dup has been set True, have to remove duplicate reads duplicate, loc_dict = DF(read_info, loc_dict, max_cov, single_on) else: duplicate = False if single_on: all_mapping_bp += len( read_info[5] ) ##record the mapping read basepair (2013-06-20) else: all_mapping_bp += len( read_info[7] ) ##record the mapping read basepair (2013-06-20) record_mate, trim_position, filter_nonuniform_trim_bp_CG, filter_duplicate_reads, filter_remove_overlap_bp = NF.nonuniform_filter( read, out, read_info, ref, remove_overlap, duplicate, single_on, record_mate, trim_position, filter_nonuniform_trim_bp_CG, filter_duplicate_reads, filter_remove_overlap_bp) out.close() del record_mate NR.nonuniform_generator(trim_position, name) for i in range(len(trim_position)): filter_nonuniform_trim_bp += i * trim_position[i] ##produce the filter report info('Produce the report file...') report_out = open(name + "_BSeQC_nonuniform_filter_report.txt", 'w') report_out.write('Total reads: %d\n' % all_reads) if single_on: report_out.write('Not unique mapping reads: %d(%.2f%s all reads)\n' % (not_mapping_reads, float(not_mapping_reads) / all_reads * 100, "%")) report_out.write( 'Unique mapping reads: %d(%.2f%s all reads)\n' % ((all_reads - not_mapping_reads), float(all_reads - not_mapping_reads) / all_reads * 100, "%")) report_out.write( 'Skip not unique mapping reads: %d(%.2f%s all reads)\n' % (filter_not_mapping_reads, float(filter_not_mapping_reads) / all_reads * 100, "%")) report_out.write('In unique mapping reads:\n') report_out.write('All unique mapping basepairs: %d\n' % all_mapping_bp) report_out.write( 'Filter Duplicate reads: %d(%.2f%s of unique mapping reads)\n' % (filter_duplicate_reads, float(filter_duplicate_reads) / (all_reads - not_mapping_reads) * 100, "%")) report_out.write( "Filter 5' nonconversion basepairs: %d(%.2f%s of unique mapping basepairs)\n" % (filter_nonuniform_trim_bp, float(filter_nonuniform_trim_bp) / all_mapping_bp * 100, "%")) report_out.write( "Filter 5' nonconversion CpG basepairs: %d(%.2f%s of unique mapping basepairs)\n" % (filter_nonuniform_trim_bp_CG, float(filter_nonuniform_trim_bp_CG) / all_mapping_bp * 100, "%")) else: report_out.write('Not unique paired mapping reads: %d(%.2f%s)\n' % (not_mapping_reads, float(not_mapping_reads) / all_reads * 100, "%")) report_out.write( 'Unique paired mapping reads: %d(%.2f%s)\n' % ((all_reads - not_mapping_reads), float(all_reads - not_mapping_reads) / all_reads * 100, "%")) report_out.write( 'Skip not paired unique mapping reads: %d(%.2f%s)\n' % (filter_not_mapping_reads, float(filter_not_mapping_reads) / all_reads * 100, "%")) report_out.write('In unique paired mapping reads:\n') report_out.write('All unique paired mapping basepairs: %d\n' % all_mapping_bp) report_out.write( 'Filter Duplicate reads: %d(%.2f%s of unique paired mapping reads)\n' % (filter_duplicate_reads, float(filter_duplicate_reads) / (all_reads - not_mapping_reads * 100), "%")) report_out.write( "Filter 5' nonconversion basepairs: %d(%.2f%s of unique mapping basepairs)\n" % (filter_nonuniform_trim_bp, float(filter_nonuniform_trim_bp) / all_mapping_bp * 100, "%")) report_out.write( "Filter 5' nonconversion CpG basepairs: %d(%.2f%s of unique mapping basepairs)\n" % (filter_nonuniform_trim_bp_CG, float(filter_nonuniform_trim_bp_CG) / all_mapping_bp * 100, "%")) report_out.write( 'Filter overlapped basepairs: %d(%.2f%s of unique paired mapping basepairs)\n' % (filter_remove_overlap_bp, float(filter_remove_overlap_bp) / all_mapping_bp * 100, "%")) report_out.close() info('Get the report file!')
def filter_sam(sam_inf, strand_t, read_l, single_on, name, s_path, auto, remove_overlap, loc_dict, max_cov, not_mapping): ''' Trim the mapping files with the biased positions of every length in every strand, which are saved in the variance: strand_t. ''' filter_duplicate_reads = 0 filter_mbias_trim_bp = 0 filter_remove_overlap_bp = 0 filter_not_mapping_reads = 0 all_reads = 0 not_mapping_reads = 0 all_mapping_bp = 0 for sam in sam_inf: out_sam = sam[:-4] + '_' + name + '_filter.sam' out = open(out_sam, 'w') #check the input mapping files sam_format, read_inf = check.check_mapping_file_header(sam, s_path) #scan every read to qc_filter for read in read_inf: #for sam header if read.startswith('@'): out.write(read) continue else: all_reads += 1 ##record the read number (2013-06-20) #Get the read information for trimming #If the read isn't unique mapping, we will get a empty list ([]). #In: single unique mapping read Out: [flag,strand,chr,pos,CIGAR,seq,score] #In: paired unique mapping read Out: [flag,strand,chr,pos1,CIGAR,pos2,insert,seq,score] read_info = RI(read) read_info = read_info.extract_information() if len(read_info) == 0: not_mapping_reads += 1 if not_mapping: #keep the not_unique mapping reads out.write(read) else: filter_not_mapping_reads += 1 ##record the not mapping read number (2013-06-20) continue if len(loc_dict) > 0: #the --filter_dup has been set True, have to remove duplicate reads duplicate, loc_dict = DF(read_info, loc_dict, max_cov, single_on) else: duplicate = False if single_on: all_mapping_bp += len(read_info[5]) ##record the mapping read basepair (2013-06-20) if auto: if read_l[0] != '': original_length = int(read_l[sam_inf.index(sam)]) else: original_length = '' filter_mbias_trim_bp, filter_duplicate_reads = SF(read, strand_t, out, read_info, original_length, duplicate, filter_mbias_trim_bp, filter_duplicate_reads) else: if not duplicate and len(loc_dict) > 0: out.write(read) #not trimming, only output not_duplicate reads else: filter_duplicate_reads += 1 ##record the duplicate read (2013-06-20) else: all_mapping_bp += len(read_info[7]) ##record the mapping read basepair (2013-06-20) if auto or remove_overlap: if read_l[0] != '': original_length = [int(i) for i in read_l[sam_inf.index(sam)].split('-')] else: original_length = '' filter_mbias_trim_bp, filter_duplicate_reads, filter_remove_overlap_bp = PF(read, strand_t, out, read_info, original_length, auto, remove_overlap, duplicate, filter_mbias_trim_bp, filter_duplicate_reads, filter_remove_overlap_bp) else: if not duplicate and len(loc_dict) > 0: out.write(read) #not trimming, only output not_duplicate reads else: filter_duplicate_reads += 1 ##record the duplicate read (2013-06-20) out.close() ##produce the filter report info('Produce the report file...') report_out = open(name + "_BSeQC_mbias_filter_report.txt", 'w') report_out.write('Total reads: %d\n' % all_reads) if single_on: report_out.write('Not unique mapping reads: %d(%.2f%s all reads)\n' % ( not_mapping_reads, float(not_mapping_reads) / all_reads * 100, "%")) report_out.write('Unique mapping reads: %d(%.2f%s all reads)\n' % ( (all_reads - not_mapping_reads), float(all_reads - not_mapping_reads) / all_reads * 100, "%")) report_out.write('Skip not unique mapping reads: %d(%.2f%s all reads)\n' % ( filter_not_mapping_reads, float(filter_not_mapping_reads) / all_reads * 100, "%")) report_out.write('In unique mapping reads:\n') report_out.write('All unique mapping basepairs: %d\n' % all_mapping_bp) report_out.write('Filter Duplicate reads: %d(%.2f%s of unique mapping reads)\n' % ( filter_duplicate_reads, float(filter_duplicate_reads) / (all_reads - not_mapping_reads) * 100, "%")) report_out.write('Filter Mbias basepairs: %d(%.2f%s of unique mapping basepairs)\n' % ( filter_mbias_trim_bp, float(filter_mbias_trim_bp) / all_mapping_bp * 100, "%")) else: report_out.write('Not unique paired mapping reads: %d(%.2f%s)\n' % ( not_mapping_reads, float(not_mapping_reads) / all_reads * 100, "%")) report_out.write('Unique paired mapping reads: %d(%.2f%s)\n' % ( (all_reads - not_mapping_reads), float(all_reads - not_mapping_reads) / all_reads * 100, "%")) report_out.write('Skip not paired unique mapping reads: %d(%.2f%s)\n' % ( filter_not_mapping_reads, float(filter_not_mapping_reads) / all_reads * 100, "%")) report_out.write('In unique paired mapping reads:\n') report_out.write('All unique paired mapping basepairs: %d\n' % all_mapping_bp) report_out.write('Filter Duplicate reads: %d(%.2f%s of unique paired mapping reads)\n' % ( filter_duplicate_reads, float(filter_duplicate_reads) / (all_reads - not_mapping_reads) * 100, "%")) report_out.write('Filter Mbias basepairs: %d(%.2f%s of unique paired mapping basepairs)\n' % ( filter_mbias_trim_bp, float(filter_mbias_trim_bp) / all_mapping_bp * 100, "%")) report_out.write('Filter overlapped basepairs: %d(%.2f%s of unique paired mapping basepairs)\n' % ( filter_remove_overlap_bp, float(filter_remove_overlap_bp) / all_mapping_bp * 100, "%")) report_out.close() info('Get the report file!')
def parser_trim_sambam(sam_inf, ref, bsmp, s_path, dige_site, single_on, remove_overlap, not_mapping, name): ''' 1. Scan each SAM file to calculate the methylation level of the end-repaired C in the MspI site. 2. Trim the end-repaired C and adapter sequence by recognizing the MspI site ''' info( 'Scan each SAM file to calculate the methylation level of the end-repaired C in the MspI site.' ) info( 'and trim the end-repaired C and adapter sequence by recognizing the MspI site.' ) dige_dict = {} #dige_dict['++'] = {'s': [[], [0, 0]], 'e': [[], [0, 0]]} #dige_dict['-+'] = {'s': [[], [0, 0]], 'e': [[], [0, 0]]} #dige_dict['++'] = {'s': [0] * 22, 'e': [0] * 22} #dige_dict['-+'] = {'s': [0] * 22, 'e': [0] * 22} dige_dict['++'] = {'s': [0] * 2, 'e': [0] * 2} dige_dict['-+'] = {'s': [0] * 2, 'e': [0] * 2} if not single_on: #dige_dict['+-'] = {'s': [[], [0, 0]], 'e': [[], [0, 0]]} #dige_dict['--'] = {'s': [[], [0, 0]], 'e': [[], [0, 0]]} #dige_dict['+-'] = {'s': [0] * 22, 'e': [0] * 22} #dige_dict['--'] = {'s': [0] * 22, 'e': [0] * 22} dige_dict['+-'] = {'s': [0] * 2, 'e': [0] * 2} dige_dict['--'] = {'s': [0] * 2, 'e': [0] * 2} filter_remove_overlap_bp = 0 filter_not_mapping_reads = 0 all_reads = 0 not_mapping_reads = 0 all_mapping_bp = 0 filter_MspI_end_repaired_bp = 0 for s in range(len(sam_inf)): sam_format, read_inf = check.check_mapping_file_header( sam_inf[s], s_path) out_sam = sam_inf[s][:-4] + '_' + name + '_filter.sam' out = open(out_sam, 'w') record_mate = {} for read in read_inf: if read.startswith('@'): out.write(read) continue else: all_reads += 1 #get read information and MspI site read_info, site_meth_list = DI.record_site(read, ref, bsmp, dige_site) if len(read_info) == 0: not_mapping_reads += 1 if not_mapping: out.write(read) else: filter_not_mapping_reads += 1 continue strand = read_info[1] if single_on: all_mapping_bp += len(read_info[5]) else: all_mapping_bp += len(read_info[7]) if len(site_meth_list) != 0: #record the methylation state of the MspI site dige_dict[strand]['s'] = [ sum(x) for x in zip(dige_dict[strand]['s'], site_meth_list[:2]) ] dige_dict[strand]['e'] = [ sum(x) for x in zip(dige_dict[strand]['e'], site_meth_list[2:]) ] #dige_dict[strand]['s'] = [sum(x) for x in zip(dige_dict[strand]['s'], site_meth_list[0] + site_meth_list[1])] #dige_dict[strand]['e'] = [sum(x) for x in zip(dige_dict[strand]['e'], site_meth_list[2] + site_meth_list[3])] #trim the end-repaired nucleotides and adapter sequence by the MspI site record_mate, filter_MspI_end_repaired_bp, filter_remove_overlap_bp = RF.rrbs_trim( read, read_info, site_meth_list, single_on, remove_overlap, record_mate, filter_MspI_end_repaired_bp, filter_remove_overlap_bp, out) out.close() return dige_dict, all_reads, all_mapping_bp, not_mapping_reads, filter_not_mapping_reads, filter_MspI_end_repaired_bp, filter_remove_overlap_bp
def parser_trim_sambam(sam_inf, ref, bsmp, s_path, dige_site, single_on, remove_overlap, not_mapping, name): ''' 1. Scan each SAM file to calculate the methylation level of the end-repaired C in the MspI site. 2. Trim the end-repaired C and adapter sequence by recognizing the MspI site ''' info('Scan each SAM file to calculate the methylation level of the end-repaired C in the MspI site.') info('and trim the end-repaired C and adapter sequence by recognizing the MspI site.') dige_dict = {} #dige_dict['++'] = {'s': [[], [0, 0]], 'e': [[], [0, 0]]} #dige_dict['-+'] = {'s': [[], [0, 0]], 'e': [[], [0, 0]]} #dige_dict['++'] = {'s': [0] * 22, 'e': [0] * 22} #dige_dict['-+'] = {'s': [0] * 22, 'e': [0] * 22} dige_dict['++'] = {'s': [0] * 2, 'e': [0] * 2} dige_dict['-+'] = {'s': [0] * 2, 'e': [0] * 2} if not single_on: #dige_dict['+-'] = {'s': [[], [0, 0]], 'e': [[], [0, 0]]} #dige_dict['--'] = {'s': [[], [0, 0]], 'e': [[], [0, 0]]} #dige_dict['+-'] = {'s': [0] * 22, 'e': [0] * 22} #dige_dict['--'] = {'s': [0] * 22, 'e': [0] * 22} dige_dict['+-'] = {'s': [0] * 2, 'e': [0] * 2} dige_dict['--'] = {'s': [0] * 2, 'e': [0] * 2} filter_remove_overlap_bp = 0 filter_not_mapping_reads = 0 all_reads = 0 not_mapping_reads = 0 all_mapping_bp = 0 filter_MspI_end_repaired_bp = 0 for s in range(len(sam_inf)): sam_format, read_inf = check.check_mapping_file_header(sam_inf[s], s_path) out_sam = sam_inf[s][:-4] + '_' + name + '_filter.sam' out = open(out_sam, 'w') record_mate = {} for read in read_inf: if read.startswith('@'): out.write(read) continue else: all_reads += 1 #get read information and MspI site read_info, site_meth_list = DI.record_site(read, ref, bsmp, dige_site) if len(read_info) == 0: not_mapping_reads += 1 if not_mapping: out.write(read) else: filter_not_mapping_reads += 1 continue strand = read_info[1] if single_on: all_mapping_bp += len(read_info[5]) else: all_mapping_bp += len(read_info[7]) if len(site_meth_list) != 0: #record the methylation state of the MspI site dige_dict[strand]['s'] = [sum(x) for x in zip(dige_dict[strand]['s'], site_meth_list[:2])] dige_dict[strand]['e'] = [sum(x) for x in zip(dige_dict[strand]['e'], site_meth_list[2:])] #dige_dict[strand]['s'] = [sum(x) for x in zip(dige_dict[strand]['s'], site_meth_list[0] + site_meth_list[1])] #dige_dict[strand]['e'] = [sum(x) for x in zip(dige_dict[strand]['e'], site_meth_list[2] + site_meth_list[3])] #trim the end-repaired nucleotides and adapter sequence by the MspI site record_mate, filter_MspI_end_repaired_bp, filter_remove_overlap_bp = RF.rrbs_trim(read, read_info, site_meth_list, single_on, remove_overlap, record_mate, filter_MspI_end_repaired_bp, filter_remove_overlap_bp, out) out.close() return dige_dict, all_reads, all_mapping_bp, not_mapping_reads, filter_not_mapping_reads, filter_MspI_end_repaired_bp, filter_remove_overlap_bp
def run(args): """ Alternative module: Use the strategy in Bis-SNP to trim 5' bisulfite conversion failures """ options = args.parse_args() if len(options.sam_file) == 0: error("Missing the SAM file, use -s or --sam option.") else: options.sam_file = options.sam_file.split(',') for s in options.sam_file: if not os.path.isfile(s): error("Can't open the SAM file: " + s) sys.exit(1) if len(options.ref_file) == 0: error("Missing the reference genome fasta file, use -r or --ref option.") else: if not os.path.isfile(options.ref_file): error("Can't open the ref file: " + options.ref_file) if len(options.samtools) != 0: if options.samtools[-1] != '/': options.samtools += '/' if len(options.name) == 0: error("Missing the output file name, use -n or --name options.") sam_inf = options.sam_file ref_file = options.ref_file bsm = options.bsm s_path = options.samtools name = options.name remove_overlap = options.remove_overlap filter_dup = options.filter_dup p_poisson = options.p_poisson gsize = options.gsize not_mapping = options.not_mapping info("Get the all parameter!!") #check the input mapping files sam_format, read_inf = check.check_mapping_file_flag(sam_inf[0], s_path) pre_flag = read_inf.readline().split('\t')[1] if 'p' in pre_flag: single_on = False info("The input mapping files are paired-end sequencing!") else: single_on = True info("The input mapping files are single-end sequencing!") loc_dict = {} if filter_dup: ## if filter_up is TRUE, the duplicate reads will be assessed and shown in Dup_dis.pdf info("The filter_dup has been set True.") info("Assess the duplicate reads...") for sam in sam_inf: #check the input mapping files sam_format, read_inf = check.check_mapping_file(sam, s_path) if single_on: for read in read_inf: loc_dict = LI.Loc_single(read, loc_dict, bsm) else: for read in read_inf: loc_dict = LI.Loc_paired(read, loc_dict, bsm) max_cov = DR.duplicate_report(loc_dict, gsize, p_poisson, name) info('Get the duplicate reads distribution!') #get reference information ref = GR.get_ref(ref_file) trim_position = [] filter_duplicate_reads = 0 filter_nonuniform_trim_bp = 0 filter_nonuniform_trim_bp_CG = 0 filter_remove_overlap_bp = 0 filter_not_mapping_reads = 0 all_reads = 0 not_mapping_reads = 0 all_mapping_bp = 0 ##filter the 5' bisulfite failure for sam in sam_inf: out_sam = sam[:-4] + '_' + name + '_filter.sam' out = open(out_sam, 'w') #check the input mapping files record_mate = {} sam_format, read_inf = check.check_mapping_file_header(sam, s_path) for read in read_inf: #for sam header if read.startswith('@'): out.write(read) continue else: all_reads += 1 ##record the read number (2013-06-20) #Get the read information for trimming #If the read isn't unique mapping, we will get a empty list ([]). #In: single unique mapping read Out: [flag,strand,chr,pos,CIGAR,seq,score] #In: paired unique mapping read Out: [flag,strand,chr,pos1,CIGAR,pos2,insert,seq,score] read_info = RI(read, bsm) read_info = read_info.extract_information() if len(read_info) == 0: not_mapping_reads += 1 if not_mapping: #keep the not_unique mapping reads (or not paired mapping) out.write(read) else: filter_not_mapping_reads += 1 ##record the not mapping read number (2013-06-20) continue if len(loc_dict) > 0: #the --filter_dup has been set True, have to remove duplicate reads duplicate, loc_dict = DF(read_info, loc_dict, max_cov, single_on) else: duplicate = False if single_on: all_mapping_bp += len(read_info[5]) ##record the mapping read basepair (2013-06-20) else: all_mapping_bp += len(read_info[7]) ##record the mapping read basepair (2013-06-20) record_mate, trim_position, filter_nonuniform_trim_bp_CG, filter_duplicate_reads, filter_remove_overlap_bp = NF.nonuniform_filter(read, out, read_info, ref, remove_overlap, duplicate, single_on, record_mate, trim_position, filter_nonuniform_trim_bp_CG, filter_duplicate_reads, filter_remove_overlap_bp) out.close() del record_mate NR.nonuniform_generator(trim_position, name) for i in range(len(trim_position)): filter_nonuniform_trim_bp += i * trim_position[i] ##produce the filter report info('Produce the report file...') report_out = open(name + "_BSeQC_nonuniform_filter_report.txt", 'w') report_out.write('Total reads: %d\n' % all_reads) if single_on: report_out.write('Not unique mapping reads: %d(%.2f%s all reads)\n' % ( not_mapping_reads, float(not_mapping_reads) / all_reads * 100, "%")) report_out.write('Unique mapping reads: %d(%.2f%s all reads)\n' % ( (all_reads - not_mapping_reads), float(all_reads - not_mapping_reads) / all_reads * 100, "%")) report_out.write('Skip not unique mapping reads: %d(%.2f%s all reads)\n' % ( filter_not_mapping_reads, float(filter_not_mapping_reads) / all_reads * 100, "%")) report_out.write('In unique mapping reads:\n') report_out.write('All unique mapping basepairs: %d\n' % all_mapping_bp) report_out.write('Filter Duplicate reads: %d(%.2f%s of unique mapping reads)\n' % ( filter_duplicate_reads, float(filter_duplicate_reads) / (all_reads - not_mapping_reads) * 100, "%")) report_out.write("Filter 5' nonconversion basepairs: %d(%.2f%s of unique mapping basepairs)\n" % ( filter_nonuniform_trim_bp, float(filter_nonuniform_trim_bp) / all_mapping_bp * 100, "%")) report_out.write("Filter 5' nonconversion CpG basepairs: %d(%.2f%s of unique mapping basepairs)\n" % ( filter_nonuniform_trim_bp_CG, float(filter_nonuniform_trim_bp_CG) / all_mapping_bp * 100, "%")) else: report_out.write('Not unique paired mapping reads: %d(%.2f%s)\n' % ( not_mapping_reads, float(not_mapping_reads) / all_reads * 100, "%")) report_out.write('Unique paired mapping reads: %d(%.2f%s)\n' % ( (all_reads - not_mapping_reads), float(all_reads - not_mapping_reads) / all_reads * 100, "%")) report_out.write('Skip not paired unique mapping reads: %d(%.2f%s)\n' % ( filter_not_mapping_reads, float(filter_not_mapping_reads) / all_reads * 100, "%")) report_out.write('In unique paired mapping reads:\n') report_out.write('All unique paired mapping basepairs: %d\n' % all_mapping_bp) report_out.write('Filter Duplicate reads: %d(%.2f%s of unique paired mapping reads)\n' % ( filter_duplicate_reads, float(filter_duplicate_reads) / (all_reads - not_mapping_reads * 100), "%")) report_out.write("Filter 5' nonconversion basepairs: %d(%.2f%s of unique mapping basepairs)\n" % ( filter_nonuniform_trim_bp, float(filter_nonuniform_trim_bp) / all_mapping_bp * 100, "%")) report_out.write("Filter 5' nonconversion CpG basepairs: %d(%.2f%s of unique mapping basepairs)\n" % ( filter_nonuniform_trim_bp_CG, float(filter_nonuniform_trim_bp_CG) / all_mapping_bp * 100, "%")) report_out.write('Filter overlapped basepairs: %d(%.2f%s of unique paired mapping basepairs)\n' % ( filter_remove_overlap_bp, float(filter_remove_overlap_bp) / all_mapping_bp * 100, "%")) report_out.close() info('Get the report file!')