def main( argv = None ): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv == None: argv = sys.argv parser.add_option("-m", "--filename-map", dest="filename_map", type="string", help="filename with mapping information.") parser.add_option("-o", "--pattern-old", dest="pattern_old", type="string", help="pattern for mapping new to old identifiers: extract string from old.") parser.add_option("-n", "--pattern-new", dest="pattern_new", type="string", help="pattern for mapping new to old identifiers: put string into new.") parser.add_option("-g", "--genome-file", dest="genome_file", type="string", help="genome_file.") parser.add_option("-p", "--peptides", dest="filename_peptides", type = "string", help="filename with peptide sequences.") parser.add_option("-f", "--input-format", dest="input_format", type="choice", help="format of mapping file", choices=("alignment", "offsets") ) parser.add_option("-i", "--write-missed", dest="write_missed", type="string", help="write missed identifiers to separate file.") parser.add_option("-a", "--filename-genes", dest="filename_genes", type="string", help="filename with gene information.") parser.add_option("--filename-old-peptides", dest="filename_old_peptides", type="string", help="filename with old peptide information.") parser.add_option("--no-renumber", dest="renumber", action="store_false", help="do not renumber predictions.") parser.add_option("--contig-sizes-old", dest="contig_sizes_old", type="string", help="contig sizes for old data.") parser.add_option("--contig-sizes-new", dest="contig_sizes_new", type="string", help="contig sizes for new data.") parser.add_option("--skip-errors", dest="skip_errors", action="store_true", help="skip entries with errors.") parser.set_defaults( filename_map = None, pattern_old = "(.+)", pattern_new = "%s", genome_file = None, filename_peptides = None, write_missed = None, filename_genes = None, filename_old_peptides = None, renumber = True, input_format = "alignment", contig_sizes_old = None, contig_sizes_new = None, skip_errors = None ) (options, args) = E.Start( parser, add_pipe_options = True) predictor = PredictorExonerate() ## the different mapping criteria map_sbjcts = {} breakpoints = {} ################################################################################################ map_transcript2gene = {} if options.filename_genes: infile = open(options.filename_genes, "r") for gene, transcript in map( lambda x: x[:-1].split("\t")[:2], filter( lambda x: x[0] != "#", infile.readlines())): map_transcript2gene[transcript] = gene infile.close() ################################################################################################ peptides = {} if options.filename_peptides: peptides = Genomics.ReadPeptideSequences( open(options.filename_peptides, "r")) options.stdlog.write( "# read %i peptide sequences.\n" % len(peptides)) ################################################################################################ ## read old query sequences and compare against new query sequences ## this can be used to build a map between old and new queries query_map_old2new = {} if options.filename_old_peptides: old_peptides = Genomics.ReadPeptideSequences( open(options.filename_old_peptides, "r")) options.stdlog.write( "# read %i old peptide sequences.\n" % len(old_peptides)) query_map_old2new, unmappable, unmapped = Genomics.MapSequences( old_peptides, peptides) options.stdlog.write( "# built map: unmappable=%i unmapped=%i.\n" % (len(unmappable), len(unmapped))) if options.loglevel >= 2: options.stdlog.write( "# unmappable: %s.\n" % ";".join(unmappable)) options.stdlog.write( "# unmapped: %s.\n" % ";".join(unmapped)) ################################################################################################ ## read old/new contig sizes for mapping positive/negative coordinates contig_sizes_old = {} contig_sizes_new = {} if options.contig_sizes_old: contig_sizes_old = Genomics.ReadContigSizes( open(options.contig_sizes_old, "r") ) if options.contig_sizes_new: contig_sizes_new = Genomics.ReadContigSizes( open(options.contig_sizes_new, "r") ) ################################################################################################ if options.filename_map: infile = open(options.filename_map) if options.input_format == "alignments": for line in infile: if line[0] == "#": continue x, old_token, old_from, old_to, old_ali, new_from, new_to, new_ali = line[:-1].split("\t") map_sbjcts[old_token] = (old_from, old_ali, new_from, new_ali) if options.loglevel >= 1: options.stdlog.write( "# read %i alignments.\n" % len(map_sbjcts)) elif options.input_format == "offsets": ## input is a list of segments and their offsets. breakpoints, endpoints, offsets = ReadOffsets( infile ) if options.loglevel >= 1: options.stdlog.write( "# read breakpoints for %i chromosomes.\n" % len(breakpoints)) infile.close() ################################################################################################ ################################################################################################ ################################################################################################ ## end of input section ################################################################################################ ################################################################################################ ################################################################################################ rx = re.compile(options.pattern_old) last_sbjct_token = None ninput = 0 nerrors = 0 nerrors_map = 0 nerrors_inconsistencies = 0 nerrors_boundaries = 0 nerrors_translation = 0 nerrors_inconsequential = 0 nerrors_realigned = 0 nmapped = 0 nfiltered = 0 naligned = 0 noutput = 0 found_transcripts = {} nduplicates = 0 output = {} for line in sys.stdin: if line[0] == "#": continue entry = PredictionParser.PredictionParserEntry() entry.Read( line ) ninput += 1 is_positive = entry.mSbjctStrand == "+" is_error = False ## check if query token is mappable: using sequence map if (query_map_old2new and entry.mQueryToken not in query_map_old2new): options.stdlog.write("# skipping prediction %i: obsolete query %s\n" % (entry.mPredictionId, entry.mQueryToken) ) nfiltered += 1 continue else: ## check if query token is mappable: using filter if (peptides and entry.mQueryToken not in peptides): options.stdlog.write("# skipping prediction %i: obsolete query %s\n" % (entry.mPredictionId, entry.mQueryToken) ) nfiltered += 1 continue new_sbjct_token = options.pattern_new % rx.search(entry.mSbjctToken).groups()[0] ########################################################################################################## ## Map via alignments if entry.mSbjctToken in map_sbjcts: nmapped += 1 if last_sbjct_token != entry.mSbjctToken: old_from, old_ali, new_from, new_ali = map_sbjcts[entry.mSbjctToken] map_a2b = alignlib_lite.makeAlignmentVector() alignlib_lite.AlignmentFormatExplicit( int(old_from), old_ali, int(new_from), new_ali).copy( map_a2b ) last_sbjct_token = entry.mSbjctToken if options.loglevel >= 3: print "#", str(entry) print "#", map_sbjcts[entry.mSbjctToken] sys.stdout.flush() old_f, old_t = entry.mSbjctGenomeFrom, entry.mSbjctGenomeTo ## convert to forward coordinates: if is_positive: f, t= old_f, old_t first_res, last_res = f + 1, t else: f, t = map_a2b.getRowTo() - old_f, map_a2b.getRowTo() - old_t first_res, last_res = f, t + 1 ## map first and last residues mfirst_res = map_a2b.mapRowToCol( first_res ) mlast_res = map_a2b.mapRowToCol( last_res ) if (mfirst_res == 0 and old_f != 0) or (mlast_res == 0 and old_t != map_a2b.getRowTo() ): options.stderr.write("# mapping not possible for prediction %i on %s %s:%i-%i -> %i-%i -> %i-%i -> %i-%i -> %i-%i\n" % \ (entry.mPredictionId, entry.mSbjctToken, entry.mSbjctStrand, old_f, old_t, f, t, first_res, last_res, mfirst_res, mlast_res, f, t)) options.stderr.write("# %s\n" % str(map_sbjcts[entry.mSbjctToken])) options.stderr.write("# %s\n" % str(entry)) options.stderr.flush() nerrors_boundaries += 1 is_error = True ## get extended boundaries for alignment later on while mfirst_res == 0 and first_res > 1: first_res -= 1 mfirst_res = map_a2b.mapRowToCol(first_res) while mlast_res == 0 and last_res < map_a2b.getRowTo(): last_res += 1 mlast_res = map_a2b.mapRowToCol(last_res) ## convert to genomic coordinates ## convert negative strand coordinates if is_positive: new_f = mfirst_res - 1 new_t = mlast_res else: new_f = mfirst_res new_t = mlast_res - 1 new_f = map_a2b.getColTo() - new_f new_t = map_a2b.getColTo() - new_t ## Now map the alignment. try: MapAlignment( entry, map_a2b ) except ValueError: options.stderr.write("# alignment mapping not possible for prediction %i on %s %s:%i-%i -> %i-%i -> %i-%i -> %i-%i -> %i-%i -> %i-%i\n" % \ (entry.mPredictionId, entry.mSbjctToken, entry.mSbjctStrand, old_f, old_t, f, t, first_res, last_res, mfirst_res, mlast_res, new_f, new_t, entry.mSbjctGenomeFrom, entry.mSbjctGenomeTo)) options.stderr.write("# %s\n" % str(map_sbjcts[entry.mSbjctToken])) options.stderr.flush() nerrors_map += 1 is_error= True if new_f != entry.mSbjctGenomeFrom or new_t != entry.mSbjctGenomeTo: options.stderr.write("# mapping inconsistency for prediction %i on %s %s:%i-%i -> %i-%i -> %i-%i -> %i-%i -> %i-%i <> %i-%i\n" % \ (entry.mPredictionId, entry.mSbjctToken, entry.mSbjctStrand, old_f, old_t, f, t, first_res, last_res, mfirst_res, mlast_res, new_f, new_t, entry.mSbjctGenomeFrom, entry.mSbjctGenomeTo)) nerrors_inconsistencies += 1 is_error = True ########################################################################################################## ## Map via offsets if entry.mSbjctToken in breakpoints: old_f, old_t = entry.mSbjctGenomeFrom, entry.mSbjctGenomeTo ## convert to forward coordinates: if is_positive: f, t= old_f, old_t else: f, t = contig_sizes_old[entry.mSbjctToken] - old_t, contig_sizes_old[entry.mSbjctToken] - old_f o1 = GetOffset( f, breakpoints[entry.mSbjctToken], endpoints[entry.mSbjctToken], offsets[entry.mSbjctToken] ) o2 = GetOffset( t, breakpoints[entry.mSbjctToken], endpoints[entry.mSbjctToken], offsets[entry.mSbjctToken] ) if o1 != o2: options.stderr.write("# break within gene %s\n" % str(entry)) nerrors_map += 1 is_error = True f += o1 t += o2 if not is_positive: f, t = contig_sizes_new[entry.mSbjctToken] - t, contig_sizes_new[entry.mSbjctToken] - f entry.mSbjctGenomeFrom, entry.mSbjctGenomeTo = f, t if entry.mSbjctGenomeFrom > entry.mSbjctGenomeTo: options.stderr.write("# mapping error: start after end %s\n" % str(entry)) nerrors_map += 1 is_error = True ########################################################################################################## ## do translation check, if genome is given if options.genome_file: genomic_sequence = Genomics.GetGenomicSequence( new_sbjct_token, entry.mSbjctStrand, entry.mSbjctGenomeFrom, entry.mSbjctGenomeTo, options.genome_file, loglevel = 0) map_peptide2translation, translation = Genomics.Alignment2PeptideAlignment( \ entry.mMapPeptide2Genome, entry.mQueryFrom, 0, genomic_sequence ) if re.sub("X", "", translation) != re.sub("X", "", entry.mTranslation): options.stderr.write("# translation error for prediction %i on %s %s:%i-%i -> %i-%i <> %i-%i\n" % \ (entry.mPredictionId, entry.mSbjctToken, entry.mSbjctStrand, old_f, old_t, f, t, entry.mSbjctGenomeFrom, entry.mSbjctGenomeTo)) if map_sbjcts: options.stderr.write("# %s\n" % str(map_sbjcts[entry.mSbjctToken])) options.stderr.write("# old=%s\n# new=%s\n" % (entry.mTranslation, translation)) options.stderr.write("# old=%s\n# new=%s\n" % (entry.mAlignmentString, Genomics.Alignment2String(entry.mMapPeptide2Genome))) nerrors_translation += 1 is_error = True if peptides and entry.mQueryToken in peptides: naligned += 1 options.stdlog.write( "# aligning: %s versus %s:%s: %i-%i\n" % ( \ entry.mQueryToken, new_sbjct_token, entry.mSbjctStrand, entry.mSbjctGenomeFrom, entry.mSbjctGenomeTo)) # do a quick reprediction if entry.mQueryToken in peptides: genomic_sequence = Genomics.GetGenomicSequence( new_sbjct_token, entry.mSbjctStrand, 0, 0, genome_file = options.genome_pattern, loglevel = 0) predictor.mLogLevel = 0 result = predictor(entry.mQueryToken, peptides[entry.mQueryToken], entry.mSbjctToken, genomic_sequence, "--exhaustive --subopt FALSE --score '%s' " % str(80), new_f - 10, new_t + 10) prediction_id = entry.mPredictionId if result: entry = result[0] entry.mPredictionId = prediction_id nerrors_realigned += 1 else: if is_error: nerrors_inconsequential += 1 entry.mSbjctToken = new_sbjct_token ## map query tokens if query_map_old2new: query_tokens = query_map_old2new[entry.mQueryToken] else: query_tokens = (entry.mQueryToken,) if options.skip_errors and is_error: continue for query_token in query_tokens: entry.mQueryToken = query_token prediction_id = entry.mPredictionId entry.mPredictionId = 0 hid = Genomics.GetHID( str(entry) ) if hid in output: nduplicates += 1 continue noutput += 1 if options.renumber: prediction_id = noutput entry.mPredictionId = prediction_id options.stdout.write( str(entry) + "\n") options.stdout.flush() found_transcripts[entry.mQueryToken] = 1 ## write out found transcripts and genes nmissed_transcripts = 0 missed_transcripts = [] found_genes = {} if peptides: for x in peptides.keys(): if x not in found_transcripts: nmissed_transcripts += 1 missed_transcripts.append( x ) else: found_genes[map_transcript2gene[x]] = 1 missed_genes = {} nmissed_genes = 0 if map_transcript2gene: for t in missed_transcripts: g = map_transcript2gene[t] if g not in found_genes: missed_genes[g] = 1 nmissed_genes = len(missed_genes) if options.write_missed: outfile = open(options.write_missed, "w") for x in missed_transcripts: if x in unmapped: status = "unmapped" else: status = "mapped" outfile.write( "%s\t%s\t%s\n" % ("transcript", x, status )) for x in missed_genes: status = "unknown" outfile.write( "%s\t%s\t%s\n" % ("gene", x, status )) outfile.close() options.stdlog.write("# input=%i, output=%i, filtered=%i, nduplicates=%i, mapped=%i, errors=%i\n" % (\ ninput, noutput, nfiltered, nduplicates, nmapped, nerrors )) options.stdlog.write("# errors: inconsequental=%i, boundaries=%i, mapping=%i, inconsistencies=%i, translation=%i, realigned=%i\n" % (\ nerrors_inconsequential, nerrors_boundaries, nerrors_map, nerrors_inconsistencies, nerrors_translation, nerrors_realigned )) options.stdlog.write("# peptides: input=%i, found=%i, missed=%i, found_genes=%i, missed_genes=%i\n" % (\ len(peptides), len(found_transcripts), nmissed_transcripts, len(found_genes), nmissed_genes) ) E.Stop()
def main( argv = None ): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv == None: argv = sys.argv parser = E.OptionParser( version = "%prog version: $Id: gpipe/predictions2transcripts.py 1841 2008-05-08 12:07:13Z andreas $", usage = globals()["__doc__"] ) parser.add_option("-g", "--genome-file", dest="genome_file", type="string", help="filename with genome." ) parser.add_option("-o", "--output-filename-summary", dest="output_filename_summary", type="string", help="filename with summary information." ) parser.add_option( "--skip-header", dest="skip_header", action="store_true", help="skip header." ) parser.add_option( "--start-codon-boundary", dest="start_codon_boundary", type="int", help="maximum extension for start codon (make divisible by 3)." ) parser.add_option( "--stop-codon-boundary", dest="stop_codon_boundary", type="int", help="maximum extension for stop codon (make divisible by 3)." ) parser.add_option( "--left-extension-mode", dest="left_extension_mode", type="choice", choices=("first-start", "first-stop-backtrack"), help="extension mode for 5' end.") parser.add_option( "--fill-introns", dest="fill_introns", type="int", help="fill intron if divisible by three and no stop codon up to a maximum length of #." ) parser.add_option( "--introns-max-stops", dest="introns_max_stops", type="int", help="maximum number of stop codons to tolerate within an intron." ) parser.add_option( "--output-format", dest="output_format", type="choice", choices=("predictions", "extensions", "filled-introns"), help="output format." ) parser.set_defaults( genome_file = "genome", start_codons = ("ATG"), stop_codons = ("TAG", "TAA", "TGA"), start_codon_boundary = 9999, stop_codon_boundary = 9999, fill_introns = 0, introns_max_stops = 0, left_splice_signals = ("GT",), right_splice_signals = ("AG",), output_format="extensions", left_extension_mode = "first-start", skip_header = False, output_filename_summary = None, ) (options, args) = E.Start( parser, add_pipe_options = True ) if len(args) > 0: print USAGE, "no arguments required." sys.exit(2) options.start_codon_boundary = int(options.start_codon_boundary / 3) options.stop_codon_boundary = int(options.stop_codon_boundary / 3) fasta = IndexedFasta.IndexedFasta( options.genome_file ) p = PredictionParser.PredictionParserEntry() ninput, noutput = 0, 0 nfilled = 0 nseqs_filled = 0 nseqs_extended = 0 left_extensions = [] right_extensions = [] filled_introns = [] if not options.skip_header: if options.output_format == "predictions": options.stdout.write( Prediction.Prediction().getHeader() + "\n" ) elif options.output_format == "filled-introns": options.stdout.write("\t".join( ("prediction_id", "intron", "peptide_sequence", "genomic_sequence") ) + "\n" ) if options.output_filename_summary: outfile_summary = open (options.output_filename_summary, "w" ) outfile_summary.write( "id\ttype\tnumber\tlength\tfrom\tto\tsequence\n" ) else: outfile_summary = None for line in options.stdin: if line[0] == "#": continue ninput += 1 p.Read(line) lsequence = fasta.getLength( p.mSbjctToken ) genome_from = max( 0, p.mSbjctGenomeFrom - options.start_codon_boundary) genome_to = min( lsequence, p.mSbjctGenomeTo + options.stop_codon_boundary) genomic_sequence = fasta.getSequence( p.mSbjctToken, p.mSbjctStrand, genome_from, genome_to ).upper() ######################################################################## ######################################################################## ######################################################################## ## Do extensions if options.start_codon_boundary or options.stop_codon_boundary: extension_start = p.mSbjctGenomeFrom - genome_from extension_stop = genome_to - p.mSbjctGenomeTo fragment_to = extension_start + p.mSbjctGenomeTo - p.mSbjctGenomeFrom lfragment = len(genomic_sequence) ######################################################################## ######################################################################## ######################################################################## ## find start codon start = extension_start found_start = False if options.left_extension_mode == "first-start": found_start, start = findCodonReverse( genomic_sequence, start, options.start_codons, options.stop_codons ) elif options.left_extension_mode == "first-stop-backtrack": if genomic_sequence[start:start+3] in options.start_codons: found_start = True else: found_start, start = findCodonReverse( genomic_sequence, start, options.stop_codons ) if found_start: E.info("prediction %s: stop found at %i (%i) backtracking ..." % ( p.mPredictionId, start, extension_start - start) ) ## bracktrack to first start codon found_start = False while start < extension_start: start += 3 if genomic_sequence[start:start+3] in options.start_codons: found_start = True break else: start = extension_start if found_start: E.info("start codon found at %i (%i)." % ( start, extension_start - start) ) else: E.info("no start codon found." ) else: E.info("prediction %s: no stop found ... backtracking to start codon." % ( p.mPredictionId ) ) found_start, start = findCodonReverse( genomic_sequence, start, options.start_codons ) E.info("prediction %s: no start codon found." % ( p.mPredictionId ) ) if found_start: start += genome_from else: start = p.mSbjctGenomeFrom dstart = p.mSbjctGenomeFrom - start ######################################################################## ######################################################################## ######################################################################## ## find stop codon ## stop points to the beginning of the codon, thus the stop codon will ## not be part of the sequence. stop = fragment_to found_stop = 0 while stop < lfragment and \ genomic_sequence[stop:stop+3] not in ("NNN", "XXX"): if genomic_sequence[stop:stop+3] in options.stop_codons: found_stop = 1 break stop += 3 if found_stop: stop += genome_from else: stop = p.mSbjctGenomeTo dstop = stop - p.mSbjctGenomeTo ######################################################################## ######################################################################## ######################################################################## ## build new prediction map_peptide2genome = [] if dstart: map_peptide2genome.append( ("G", 0, dstart) ) map_peptide2genome += p.mMapPeptide2Genome if dstop: map_peptide2genome.append( ("G", 0, dstop) ) E.info("prediction %s: extension: found_start=%i, found_stop=%i, left=%i, right=%i" % ( p.mPredictionId, found_start, found_stop, dstart, dstop ) ) ## save results p.mMapPeptide2Genome = map_peptide2genome p.mAlignmentString = Genomics.Alignment2String( map_peptide2genome ) p.mSbjctGenomeFrom -= dstart p.mSbjctGenomeTo += dstop p.mSbjctFrom += dstart / 3 p.mSbjctTo += dstart / 3 + dstop / 3 if dstart or dstop: if dstart: left_extensions.append( dstart ) if dstop: right_extensions.append( dstop ) nseqs_extended += 1 ## update genomic sequence because borders might have changed. genomic_sequence = fasta.getSequence( p.mSbjctToken, p.mSbjctStrand, p.mSbjctGenomeFrom, p.mSbjctGenomeTo ).upper() if options.fill_introns: has_filled = False exons = Exons.Alignment2Exons( p.mMapPeptide2Genome, query_from = 0, sbjct_from = 0 ) new_exons = [] last_e = exons[0] nintron = 0 for e in exons[1:]: nintron += 1 lintron = e.mGenomeFrom - last_e.mGenomeTo if lintron > options.fill_introns or (lintron) % 3 != 0: E.debug( "prediction %s: intron %i of size %i discarded." % \ (p.mPredictionId, nintron, lintron ) ) new_exons.append(last_e) last_e = e continue ## get sequence, include also residues from split codons ## when checking for stop codons. if e.mAlignment[0][0] == "S": offset_left = last_e.mAlignment[-1][2] offset_right = e.mAlignment[0][2] else: offset_left, offset_right = 0, 0 sequence = genomic_sequence[last_e.mGenomeTo - offset_left:e.mGenomeFrom+offset_right] ## check for splice sites for signal in options.left_splice_signals: if sequence[offset_left:offset_left+len(signal)] == signal: left_signal = True break else: left_signal = False for signal in options.right_splice_signals: if sequence[-(len(signal)+offset_right):-offset_right] == signal: right_signal = True break else: right_signal = False nstops, ngaps = 0, 0 for codon in [ sequence[x:x+3] for x in range(0,len(sequence),3) ]: if codon in options.stop_codons: nstops += 1 if "N" in codon.upper(): ngaps += 1 E.debug( "prediction %s: intron %i of size %i (%i-%i) (%s:%s:%i:%i): stops=%i, gaps=%i, signals=%s,%s." % \ (p.mPredictionId, nintron, lintron, offset_left, offset_right, p.mSbjctToken, p.mSbjctStrand, p.mSbjctGenomeFrom + last_e.mGenomeTo, p.mSbjctGenomeFrom + e.mGenomeFrom, nstops, ngaps, left_signal, right_signal ) ) if nstops + ngaps > options.introns_max_stops: new_exons.append(last_e) last_e = e continue E.info( "prediction %s: filling intron %i of size %i: stops=%i, gaps=%i, signals=%s,%s" % \ (p.mPredictionId, nintron, lintron, nstops, ngaps, left_signal, right_signal)) e.Merge( last_e ) has_filled = True nfilled += 1 last_e = e if options.output_format == "filled-introns": options.stdout.write( "\t".join( map(str, ( p.mPredictionId, nintron, Genomics.TranslateDNA2Protein( sequence ), sequence ) ) ) + "\n" ) filled_introns.append(lintron) p.mNIntrons -= 1 new_exons.append(last_e) if has_filled: nseqs_filled += 1 Exons.UpdatePeptideCoordinates( new_exons ) p.mMapPeptide2Genome = Exons.Exons2Alignment( new_exons ) p.mAlignmentString = Genomics.Alignment2String( p.mMapPeptide2Genome ) ## build translated sequence p.mMapPeptide2Translation, p.mTranslation = Genomics.Alignment2PeptideAlignment( \ p.mMapPeptide2Genome, p.mQueryFrom, 0, genomic_sequence ) ## output info if options.output_format == "predictions": options.stdout.write( str(p) + "\n" ) elif options.output_format == "extensions": if found_start: found_start = 1 if found_stop: found_stop = 1 options.stdout.write( "\t".join( map(str, ( p.mPredictionId, found_start, found_stop, dstart, dstop, p.mTranslation, p.mSbjctGenomeFrom, p.mSbjctGenomeTo, p.mAlignmentString ))) + "\n" ) noutput += 1 options.stdout.flush() E.info("stats : %s" % "\t".join(Stats.DistributionalParameters().getHeaders() )) E.info("left : %s" % str(Stats.DistributionalParameters(left_extensions)) ) E.info("right : %s" % str(Stats.DistributionalParameters(right_extensions)) ) E.info("introns: %s" % str(Stats.DistributionalParameters(filled_introns)) ) E.info("ninput=%i, noutput=%i, nextended=%i, nfilled=%i, nexons_filled=%i" % (\ ninput, noutput, nseqs_extended, nseqs_filled, nfilled)) E.Stop()