def main(): parser = argparse.ArgumentParser() parser.add_argument('input',help="PSLFILE or - for STDIN") args = parser.parse_args() inf = sys.stdin if args.input != '-': inf = open(args.input) z = 0 for line in inf: z+=1 p = PSL(line.rstrip()) print str(z) + "\t" + p.value('qName') + "\t" + p.value('tName')+"\t"+str(p.get_coverage())+"\t"+str(p.value('qSize'))+"\t"+str(p.get_quality()) inf.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('input', help="PSLFILE or - for STDIN") args = parser.parse_args() inf = sys.stdin if args.input != '-': inf = open(args.input) z = 0 for line in inf: z += 1 p = PSL(line.rstrip()) print str(z) + "\t" + p.value('qName') + "\t" + p.value( 'tName') + "\t" + str(p.get_coverage()) + "\t" + str( p.value('qSize')) + "\t" + str(p.get_quality()) inf.close()
def read_next(self): mpa = MultiplePSLAlignments() mcnt = 0 current_name = None if self.previous: #We have one waiting to go into an alignment l1 = self.previous p1 = PSL(l1.rstrip()) current_name = p1.value('qName') mpa.add_entry(p1) mcnt += 1 else: # It must be our first entry, so prime our buffer l1 = None while True: l1 = self.fh.readline() if not l1: return None if not is_valid(l1.rstrip()): continue # go till we get a PSL break p1 = PSL(l1.rstrip()) current_name = p1.value('qName') mpa.add_entry(p1) mcnt += 1 while True: l2 = self.fh.readline() if not l2: self.previous = None if mcnt > 0: return mpa return None if not is_valid(l2): sys.stderr.write("Warning line is not a valid psl line\n" + l2.rstrip() + "\n") continue # just skip strange bad lines like we never saw them p2 = PSL(l2.rstrip()) if p2.value( 'qName' ) == current_name: # We are working on this set of entries mpa.add_entry(p2) mcnt += 1 else: # We have a new set so buffer it and output what we have so far self.previous = l2 # buffer the line if mcnt > 0: return mpa sys.stderr.write("ERROR: How are we here?\n") sys.exit()
def read_next(self): mpa = MultiplePSLAlignments() mcnt = 0 current_name = None if self.previous: #We have one waiting to go into an alignment l1 = self.previous p1 = PSL(l1.rstrip()) current_name = p1.value('qName') mpa.add_entry(p1) mcnt += 1 else: # It must be our first entry, so prime our buffer l1 = None while True: l1 = self.fh.readline() if not l1: return None if not is_valid(l1.rstrip()): continue # go till we get a PSL break p1 = PSL(l1.rstrip()) current_name = p1.value('qName') mpa.add_entry(p1) mcnt += 1 while True: l2 = self.fh.readline() if not l2: self.previous = None if mcnt > 0: return mpa return None if not is_valid(l2): sys.stderr.write("Warning line is not a valid psl line\n"+l2.rstrip()+"\n") continue # just skip strange bad lines like we never saw them p2 = PSL(l2.rstrip()) if p2.value('qName') == current_name: # We are working on this set of entries mpa.add_entry(p2) mcnt += 1 else: # We have a new set so buffer it and output what we have so far self.previous = l2 # buffer the line if mcnt > 0: return mpa sys.stderr.write("ERROR: How are we here?\n") sys.exit()
def main(): parser = argparse.ArgumentParser( description="Convert a sam file into a psl file") parser.add_argument('--genome', help="FASTA input file of reference genome") parser.add_argument('--get_secondary_alignments', action='store_true', help="Report SA:Z secondary alignments as well") parser.add_argument('--get_alternative_alignments', action='store_true', help="Report XA:Z alternative alignments as well") parser.add_argument( '--get_all_alignments', action='store_true', help="Report SA:Z and XA:Z alternative alignments as well") parser.add_argument('--give_unique_names', action='store_true', help="Output query names will be unique.") group = parser.add_mutually_exclusive_group() group.add_argument( '--output_fasta', help= "FILENAME to save an outgoing fasta. Only works for primary alignments." ) group.add_argument( '--output_fastq', help= "FILENAME to save an outgoing fastq. Only works for primary alignments." ) parser.add_argument('infile', help="FILENAME input file or '-' for STDIN") parser.add_argument('-o', '--output', help="FILENAME for the output, STDOUT if not set.") args = parser.parse_args() if (args.output_fasta or args.output_fastq) and (args.get_secondary_alignments or args.get_alternative_alignments or args.get_all_alignments): sys.stderr.write( "ERROR, can only output the fastq/fasta if we are doing primary alignments only.\n" ) sys.exit() inf = sys.stdin if args.infile != '-': inf = open(args.infile) of = sys.stdout if args.output: of = open(args.output, 'w') spcf = SamBasics.SAMtoPSLconversionFactory() if args.genome: spcf.set_genome(args.genome) off = None if args.output_fasta: off = open(args.output_fasta, 'w') if args.output_fastq: off = open(args.output_fastq, 'w') z = 0 for line in inf: line = line.rstrip() if SamBasics.is_header(line): spcf.read_header_line(line) continue # We have a line to convert psl = spcf.convert_line(line) if psl: pobj = PSL(psl) z += 1 if args.give_unique_names: pobj.entry['qName'] = 'Q' + str(z) of.write(pobj.get_line() + "\n") if args.output_fastq or args.output_fasta: sam = SamBasics.SAM(line) sequence = sam.value('seq').upper() quality = sam.value('qual') if sam.check_flag(16): sequence = rc(sam.value('seq').upper()) quality = sam.value('qual')[::-1] if args.output_fasta: off.write(">" + pobj.value('qName') + "\n" + sequence + "\n") elif args.output_fastq: if len(sequence) == len(quality): off.write("@" + pobj.value('qName') + "\n" + sequence + "\n" + "+\n" + quality + "\n") else: sys.stderr.write("ERROR: sequence " + sequence + " length (" + str(len(sequence)) + ") doesnt match quality " + quality + " length (" + str(len(quality)) + ")\n") sys.exit() # Lets look for secondary alignments to convert if args.get_secondary_alignments or args.get_all_alignments: secondary_alignments = SamBasics.get_secondary_alignments( line.rstrip()) for samline in secondary_alignments: psl = spcf.convert_line(samline) if psl: #print "\nsecondary" #print samline z += 1 pobj = PSL(psl) if args.give_unique_names: pobj.entry['qName'] = 'Q' + str(z) of.write(pobj.get_line() + "\n") if args.get_alternative_alignments or args.get_all_alignments: alternative_alignments = SamBasics.get_alternative_alignments( line.rstrip()) for samline in alternative_alignments: psl = spcf.convert_line(samline) if psl: #print "\nsecondary" #print samline z += 1 pobj = PSL(psl) if args.give_unique_names: pobj.entry['qName'] = 'Q' + str(z) of.write(pobj.get_line() + "\n") inf.close() of.close()
def do_psl(args): for line in args.input: psl = PSL(line) cov = sum(psl.value('blockSizes')) print cov
def main(): parser = argparse.ArgumentParser(description="Convert a sam file into a psl file") parser.add_argument('--genome',help="FASTA input file of reference genome") parser.add_argument('--get_secondary_alignments',action='store_true',help="Report SA:Z secondary alignments as well") parser.add_argument('--get_alternative_alignments',action='store_true',help="Report XA:Z alternative alignments as well") parser.add_argument('--get_all_alignments',action='store_true',help="Report SA:Z and XA:Z alternative alignments as well") parser.add_argument('--give_unique_names',action='store_true',help="Output query names will be unique.") group = parser.add_mutually_exclusive_group() group.add_argument('--output_fasta',help="FILENAME to save an outgoing fasta. Only works for primary alignments.") group.add_argument('--output_fastq',help="FILENAME to save an outgoing fastq. Only works for primary alignments.") parser.add_argument('infile',help="FILENAME input file or '-' for STDIN") parser.add_argument('-o','--output',help="FILENAME for the output, STDOUT if not set.") args = parser.parse_args() if (args.output_fasta or args.output_fastq) and (args.get_secondary_alignments or args.get_alternative_alignments or args.get_all_alignments): sys.stderr.write("ERROR, can only output the fastq/fasta if we are doing primary alignments only.\n") sys.exit() inf = sys.stdin if args.infile != '-': inf = open(args.infile) of = sys.stdout if args.output: of = open(args.output,'w') spcf = SamBasics.SAMtoPSLconversionFactory() if args.genome: spcf.set_genome(args.genome) off = None if args.output_fasta: off = open(args.output_fasta,'w') if args.output_fastq: off = open(args.output_fastq,'w') z = 0 for line in inf: line = line.rstrip() if SamBasics.is_header(line): spcf.read_header_line(line) continue # We have a line to convert psl = spcf.convert_line(line) if psl: pobj = PSL(psl) z += 1 if args.give_unique_names: pobj.entry['qName'] = 'Q'+str(z) of.write(pobj.get_line()+"\n") if args.output_fastq or args.output_fasta: sam = SamBasics.SAM(line) sequence = sam.value('seq').upper() quality = sam.value('qual') if sam.check_flag(16): sequence = rc(sam.value('seq').upper()) quality = sam.value('qual')[::-1] if args.output_fasta: off.write(">"+pobj.value('qName')+"\n"+sequence+"\n") elif args.output_fastq: if len(sequence) == len(quality): off.write("@"+pobj.value('qName')+"\n"+sequence+"\n"+"+\n"+quality+"\n") else: sys.stderr.write("ERROR: sequence "+sequence+" length ("+str(len(sequence))+") doesnt match quality "+quality+" length ("+str(len(quality))+")\n") sys.exit() # Lets look for secondary alignments to convert if args.get_secondary_alignments or args.get_all_alignments: secondary_alignments = SamBasics.get_secondary_alignments(line.rstrip()) for samline in secondary_alignments: psl = spcf.convert_line(samline) if psl: #print "\nsecondary" #print samline z += 1 pobj = PSL(psl) if args.give_unique_names: pobj.entry['qName'] = 'Q'+str(z) of.write(pobj.get_line()+"\n") if args.get_alternative_alignments or args.get_all_alignments: alternative_alignments = SamBasics.get_alternative_alignments(line.rstrip()) for samline in alternative_alignments: psl = spcf.convert_line(samline) if psl: #print "\nsecondary" #print samline z += 1 pobj = PSL(psl) if args.give_unique_names: pobj.entry['qName'] = 'Q'+str(z) of.write(pobj.get_line()+"\n") inf.close() of.close()