def main(): parser = argparse.ArgumentParser() parser.add_argument('input', help="input sorted bam or - for STDIN. expects header") parser.add_argument( '--positional_duplicates', type=int, help= "maximum number of positional duplicatse to allow through from a sorted sam" ) args = parser.parse_args() in_header = True bam = False if args.input == '-': args.input = sys.stdin else: bam = True cmd = "samtools view -h " + args.input p = Popen(cmd.split(), stdout=PIPE) args.input = p.stdout line = args.input.readline() if not line: return #done buffer_name = '' buffer_count = 0 while True: if in_header: line = args.input.readline() if not line: break if is_header(line): print line.rstrip() continue else: in_header = False else: line = args.input.readline() if not line: break #have a line f = line.split("\t") if args.positional_duplicates: pos = ':'.join([f[2], f[3], f[5]]) if pos != buffer_name: buffer_count = 0 buffer_name = pos buffer_count += 1 if buffer_count <= args.positional_duplicates: print line.rstrip() else: print line.rstrip() if bam: p.communicate()
def main(): parser = argparse.ArgumentParser(description="For every genepred entry report its alignability",formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('input',help="STDIN is -") parser.add_argument('-k','--fragment_size',default=100,type=int,help="Fragment size to try to align") parser.add_argument('--threads',type=int,default=cpu_count(),help="number of threads") parser.add_argument('--type',choices=['mean','median'],default='mean',help="how to combine mappability fraction") parser.add_argument('--perbase',action='store_true',help='show all averages') parser.add_argument('--output','-o',help="output file or STDOUT if not set") args = parser.parse_args() if args.input == '-': args.input = sys.stdin else: args.input = open(args.input) if args.output: args.output = open(args.output,'w') else: args.output = sys.stdout buffer = [] prev = -1 for line in args.input: if is_header(line): continue sam = SAM(line) name = decode_name(sam.value('qname')).split("\t") qlen = len(sam.value('seq')) if qlen != args.fragment_size: sys.stderr.write("WARNING qlen != fragment_size\n") cnt = 0 if sam.value('cigar')!='*': m = re.search('NH:i:(\d+)',sam.value('remainder')) if not m: sys.stderr.write("ERROR not hisat format\n") sys.exit() cnt = int(m.group(1)) name[2] = int(name[2]) name[3] = int(name[3]) name[4] = int(name[4]) name.append(cnt) name.append(qlen) if name[2] != prev: if len(buffer) > 0: output_buffer(buffer,args) buffer = [] prev = name[2] buffer.append(name) if len(buffer) > 0: output_buffer(buffer,args)
def main(): parser = argparse.ArgumentParser() parser.add_argument('input',help="input sorted bam or - for STDIN. expects header") parser.add_argument('--positional_duplicates',type=int,help="maximum number of positional duplicatse to allow through from a sorted sam") args = parser.parse_args() in_header = True bam = False if args.input == '-': args.input = sys.stdin else: bam = True cmd = "samtools view -h "+args.input p = Popen(cmd.split(),stdout=PIPE) args.input = p.stdout line = args.input.readline() if not line: return #done buffer_name = '' buffer_count = 0 while True: if in_header: line = args.input.readline() if not line: break if is_header(line): print line.rstrip() continue else: in_header = False else: line = args.input.readline() if not line: break #have a line f = line.split("\t") if args.positional_duplicates: pos = ':'.join([f[2],f[3],f[5]]) if pos != buffer_name: buffer_count = 0 buffer_name = pos buffer_count += 1 if buffer_count <= args.positional_duplicates: print line.rstrip() else: print line.rstrip() if bam: p.communicate()
def main(): parser = argparse.ArgumentParser() parser.add_argument('input',help="BAM FILE or '-' for STDIN SAM format") args = parser.parse_args() inf = sys.stdin if args.input != '-': cmd = "samtools view -F 4 -h "+args.input p = Popen(cmd.split(),stdout=PIPE) inf = p.stdout for line in inf: if is_header(line): print line.rstrip() continue sam = SAM(line) if sam.entry['cigar'] == '*': continue m = re.search('NH:i:(\d+)',sam.entry['remainder']) if not m: sys.stderr.write("ERROR not a hisat entry\n") sys.exit() if int(m.group(1))==1: print line.rstrip()
def main(): parser = argparse.ArgumentParser() parser.add_argument('input', help="BAM FILE or '-' for STDIN SAM format") args = parser.parse_args() inf = sys.stdin if args.input != '-': cmd = "samtools view -F 4 -h " + args.input p = Popen(cmd.split(), stdout=PIPE) inf = p.stdout for line in inf: if is_header(line): print line.rstrip() continue sam = SAM(line) if sam.entry['cigar'] == '*': continue m = re.search('NH:i:(\d+)', sam.entry['remainder']) if not m: sys.stderr.write("ERROR not a hisat entry\n") sys.exit() if int(m.group(1)) == 1: print line.rstrip()