def runRegexMotifSearch(infiles, outfile): '''run a regular expression search on sequences. compute counts. ''' motif = "[AG]G[GT]T[CG]A" reverse_motif = "T[GC]A[CA]C[TC]" controlfile, dbfile = infiles if not os.path.exists(controlfile): raise ValueError("control file %s for %s does not exist" % (controlfile, dbfile)) motifs = [] for x in range(0, 15): motifs.append( ("DR%i" % x, re.compile(motif + "." * x + motif, re.IGNORECASE))) for x in range(0, 15): motifs.append(("ER%i" % x, re.compile(motif + "." * x + reverse_motif, re.IGNORECASE))) db_positions = Motifs.countMotifs(iotools.open_file(dbfile, "r"), motifs) control_positions = Motifs.countMotifs(iotools.open_file(controlfile, "r"), motifs) db_counts, control_counts = Motifs.getCounts( db_positions), Motifs.getCounts(control_positions) db_seqcounts, control_seqcounts = Motifs.getOccurances( db_positions), Motifs.getCounts(control_positions) ndb, ncontrol = len(db_positions), len(control_positions) outf = iotools.open_file(outfile, "w") outf.write( "motif\tmotifs_db\tmotifs_control\tseq_db\tseq_db_percent\tseq_control\tseq_control_percent\tfold\n" ) for motif, pattern in motifs: try: fold = float(db_seqcounts[motif]) * \ ncontrol / (ndb * control_seqcounts[motif]) except ZeroDivisionError: fold = 0 outf.write( "%s\t%i\t%i\t%i\t%s\t%i\t%s\t%5.2f\n" % (motif, db_counts[motif], control_counts[motif], db_seqcounts[motif], iotools.pretty_percent(db_seqcounts[motif], ndb), control_seqcounts[motif], iotools.pretty_percent(control_seqcounts[motif], ncontrol), fold))
def __str__(self): return "\t".join(map(str, ( self.mGenes1, self.mGenes2, self.mGenesOverlapping1, self.mGenesOverlapping2, self.mGenesUnique1, self.mGenesUnique2, self.mExons1, self.mExons2, self.mExonsOverlapping1, self.mExonsOverlapping2, self.mExonsUnique1, self.mExonsUnique2, self.mBases1, self.mBases2, self.mBasesOverlapping1, self.mBasesOverlapping2, self.mBasesUnique1, self.mBasesUnique2))) + "\t" +\ "\t".join([iotools.pretty_percent(*x) for x in ( (self.mGenesOverlapping1, self.mGenes1), (self.mGenesOverlapping2, self.mGenes2), (self.mGenesUnique1, self.mGenes1), (self.mGenesUnique2, self.mGenes2), (self.mExonsOverlapping1, self.mExons1), (self.mExonsOverlapping2, self.mExons2), (self.mExonsUnique1, self.mExons1), (self.mExonsUnique2, self.mExons2), (self.mBasesOverlapping1, self.mBases1), (self.mBasesOverlapping2, self.mBases2), (self.mBasesUnique1, self.mBases1), (self.mBasesUnique2, self.mBases2))])
def main(argv=None): parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-d", "--delimiter", dest="delimiter", type="string", help="delimiter to separate columns [%default]") parser.add_option("-m", "--method", dest="methods", type="choice", action="append", choices=["row-describe", "column-describe"], help="additional methods to apply [%default]") parser.set_defaults( delimiter="\t", methods=[], ) (options, args) = E.start(parser, argv=argv, add_output_options=True) if not options.methods: options.methods = ["summary"] table = pandas.read_csv(options.stdin, options.delimiter) options.stdout.write("metric\tcount\tpercent\tinfo\n") for method in options.methods: label = re.sub("-", "_", method) if method == "summary": for category, count, denominator, info in compute_table_summary( table): options.stdout.write("\t".join( map(str, (category, count, iotools.pretty_percent(count, denominator, na=""), info))) + "\n") elif method == "column-describe": df = table.describe().T.stack() with E.open_output_file(label) as outf: outf.write("label\tcategory\tvalue\n") df.to_csv(outf, sep="\t") elif method == "row-describe": df = table.T.describe().stack() with E.open_output_file(label) as outf: outf.write("label\tcategory\tvalue\n") df.to_csv(outf, sep="\t") E.stop()
def main(argv=None): parser = E.OptionParser( version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option( "-s", "--species", dest="species", type="string", help="species to use [default=%default].") parser.add_option( "-i", "--slims", dest="filename_slims", type="string", help="filename with GO SLIM categories " "[default=%default].") parser.add_option( "-g", "--genes-tsv-file", dest="filename_genes", type="string", help="filename with genes to analyse " "[default=%default].") parser.add_option( "-b", "--background-tsv-file", dest="filename_background", type="string", help="filename with background genes to analyse " "[default=%default].") parser.add_option( "-m", "--min-counts", dest="minimum_counts", type="int", help="minimum count - ignore all categories that have " "fewer than # number of genes" " [default=%default].") parser.add_option( "-o", "--sort-order", dest="sort_order", type="choice", choices=("fdr", "pvalue", "ratio"), help="output sort order [default=%default].") parser.add_option( "--ontology", dest="ontology", type="string", action="append", help="go ontologies to analyze. Ontologies are tested " "separately [default=%default].") parser.add_option( "-t", "--threshold", dest="threshold", type="float", help="significance threshold [>1.0 = all ]. If --fdr is set, this " "refers to the fdr, otherwise it is a cutoff for p-values.") parser.add_option( "--filename-dump", dest="filename_dump", type="string", help="dump GO category assignments into a flatfile " "[default=%default].") parser.add_option( "--gene2name-map-tsv-file", dest="filename_gene2name", type="string", help="optional filename mapping gene identifiers to gene names " "[default=%default].") parser.add_option( "--filename-ontology", dest="filename_ontology", type="string", help="filename with ontology in OBO format [default=%default].") parser.add_option( "--filename-input", dest="filename_input", type="string", help="read GO category assignments from a flatfile " "[default=%default].") parser.add_option( "--sample-size", dest="sample", type="int", help="do sampling (with # samples) [default=%default].") parser.add_option( "--filename-output-pattern", "--output-filename-pattern", dest="output_filename_pattern", type="string", help="pattern with output filename pattern " "(should contain: %(go)s and %(section)s ) [default=%default]") parser.add_option( "--fdr", dest="fdr", action="store_true", help="calculate and filter by FDR default=%default].") parser.add_option( "--go2goslim", dest="go2goslim", action="store_true", help="convert go assignments in STDIN to goslim assignments and " "write to STDOUT [default=%default].") parser.add_option( "--gene-pattern", dest="gene_pattern", type="string", help="pattern to transform identifiers to GO gene names " "[default=%default].") parser.add_option( "--filename-map-slims", dest="filename_map_slims", type="string", help="write mapping between GO categories and GOSlims " "[default=%default].") parser.add_option( "--get-genes", dest="get_genes", type="string", help="list all genes in the with a certain GOID [default=%default].") parser.add_option( "--strict", dest="strict", action="store_true", help="require all genes in foreground to be part of background. " "If not set, genes in foreground will be added to the background " "[default=%default].") parser.add_option( "-q", "--fdr-method", dest="qvalue_method", type="choice", choices=("empirical", "storey", "BH"), help="method to perform multiple testing correction by controlling " "the fdr [default=%default].") parser.add_option( "--pairwise", dest="compute_pairwise", action="store_true", help="compute pairwise enrichment for multiple gene lists. " "[default=%default].") # parser.add_option( "--fdr-lambda", dest="qvalue_lambda", type="float", # help="fdr computation: lambda [default=%default]." ) # parser.add_option( "--qvalue-pi0-method", dest="qvalue_pi0_method", type="choice", # choices = ("smoother", "bootstrap" ), # help="fdr computation: method for estimating pi0 [default=%default]." ) parser.set_defaults(species=None, filename_genes="-", filename_background=None, filename_slims=None, minimum_counts=0, ontology=[], filename_dump=None, sample=0, fdr=False, output_filename_pattern=None, threshold=0.05, filename_map_slims=None, gene_pattern=None, sort_order="ratio", get_genes=None, strict=False, qvalue_method="empirical", pairs_min_observed_counts=3, compute_pairwise=False, filename_gene2name=None ) (options, args) = E.start(parser, add_database_options=True) if options.go2goslim: GO.convertGo2Goslim(options) E.stop() sys.exit(0) if options.fdr and options.sample == 0: E.warn("fdr will be computed without sampling") ############################################################# # dump GO if options.filename_dump: # set default orthologies to GO if not options.ontology: options.ontology = [ "biol_process", "mol_function", "cell_location"] E.info("dumping GO categories to %s" % (options.filename_dump)) dbhandle = database.connect(url=options.database_url) outfile = iotools.open_file(options.filename_dump, "w", create_dir=True) GO.DumpGOFromDatabase(outfile, dbhandle, options) outfile.close() E.stop() sys.exit(0) ############################################################# # read GO categories from file if options.filename_input: E.info("reading association of categories and genes from %s" % (options.filename_input)) infile = iotools.open_file(options.filename_input) gene2gos, go2infos = GO.ReadGene2GOFromFile(infile) infile.close() if options.filename_gene2name: E.info("reading gene identifier to gene name mapping from %s" % options.filename_gene2name) infile = iotools.open_file(options.filename_gene2name) gene2name = iotools.read_map(infile, has_header=True) infile.close() E.info("read %i gene names for %i gene identifiers" % (len(set(gene2name.values())), len(gene2name))) else: # use identity mapping gene2name = dict([(x, x) for x in list(gene2gos.keys())]) ############################################################# # read GO ontology from file if options.filename_ontology: E.info("reading ontology from %s" % (options.filename_ontology)) infile = iotools.open_file(options.filename_ontology) ontology = GO.readOntology(infile) infile.close() def _g(): return collections.defaultdict(GO.GOInfo) go2infos = collections.defaultdict(_g) # substitute go2infos for go in list(ontology.values()): go2infos[go.mNameSpace][go.mId] = GO.GOInfo( go.mId, go_type=go.mNameSpace, description=go.mName) ############################################################# # get foreground gene list input_foreground, genelists = GO.ReadGeneLists( options.filename_genes, gene_pattern=options.gene_pattern) E.info("read %i genes for forground in %i gene lists" % (len(input_foreground), len(genelists))) ############################################################# # get background if options.filename_background: # nick - bug fix: background is the first tuple element from # ReadGeneLists input_background = GO.ReadGeneLists( options.filename_background, gene_pattern=options.gene_pattern)[0] E.info("read %i genes for background" % len(input_background)) else: input_background = None ############################################################# # sort out which ontologies to test if not options.ontology: if options.filename_input: options.ontology = list(gene2gos.keys()) E.info("found %i ontologies: %s" % (len(options.ontology), options.ontology)) summary = [] summary.append("\t".join(( "genelist", "ontology", "significant", "threshold", "ngenes", "ncategories", "nmaps", "nforegound", "nforeground_mapped", "nbackground", "nbackground_mapped", "nsample_counts", "nbackground_counts", "psample_assignments", "pbackground_assignments", "messages")) + "\n") ############################################################# # get go categories for genes for test_ontology in sorted(options.ontology): # store results for aggregate output of multiple gene lists all_results = [] all_significant_results = [] all_genelists_with_results = [] E.info("working on ontology %s" % test_ontology) ############################################################# # get/read association of GO categories to genes if options.filename_input: gene2go, go2info = gene2gos[test_ontology], go2infos[test_ontology] else: E.info("reading data from database ...") dbhandle.Connect(options) gene2go, go2info = GO.ReadGene2GOFromDatabase( dbhandle, test_ontology, options.database, options.species) E.info("finished") if len(go2info) == 0: E.warn( "could not find information for terms - " "could be mismatch between ontologies") ngenes, ncategories, nmaps, counts_per_category = GO.CountGO(gene2go) E.info("assignments found: %i genes mapped to %i categories " "(%i maps)" % (ngenes, ncategories, nmaps)) if options.minimum_counts > 0: to_remove = set( [x for x, y in counts_per_category.items() if y < options.minimum_counts]) E.info("removing %i categories with less than %i genes" % (len(to_remove), options.minimum_counts)) GO.removeCategories(gene2go, to_remove) ngenes, ncategories, nmaps, counts_per_category = \ GO.CountGO(gene2go) E.info("assignments after filtering: %i genes mapped " "to %i categories (%i maps)" % ( ngenes, ncategories, nmaps)) for genelist_name, foreground in sorted(genelists.items()): msgs = [] E.info("processing %s with %i genes" % (genelist_name, len(foreground))) ################################################################## ################################################################## ################################################################## # build background - reconcile with foreground ################################################################## if input_background is None: background = list(gene2go.keys()) else: background = list(input_background) # nick - bug-fix backgorund included the foreground in a tuple. # background is the first tuple element missing = foreground.difference(set(background)) if options.strict: assert len(missing) == 0, \ "%i genes in foreground but not in background: %s" % ( len(missing), str(missing)) else: if len(missing) != 0: E.warn("%i genes in foreground that are not in " "background - added to background of %i" % (len(missing), len(background))) background.extend(missing) E.info("(unfiltered) foreground=%i, background=%i" % (len(foreground), len(background))) # sort foreground and background, important for reproducibility # under random seed foreground = sorted(foreground) background = sorted(background) ############################################################# # sanity checks: # are all of the foreground genes in the dataset # missing = set(genes).difference( set(gene2go.keys()) ) # assert len(missing) == 0, "%i genes in foreground set without GO annotation: %s" % (len(missing), str(missing)) ############################################################# # read GO slims and map GO categories to GO slim categories if options.filename_slims: go_slims = GO.GetGOSlims( iotools.open_file(options.filename_slims, "r")) if options.loglevel >= 1: v = set() for x in list(go_slims.values()): for xx in x: v.add(xx) options.stdlog.write( "# read go slims from %s: go=%i, slim=%i\n" % (options.filename_slims, len(go_slims), len(v))) if options.filename_map_slims: if options.filename_map_slims == "-": outfile = options.stdout else: outfile = iotools.open_file( options.filename_map_slims, "w") outfile.write("GO\tGOSlim\n") for go, go_slim in sorted(list(go_slims.items())): outfile.write("%s\t%s\n" % (go, go_slim)) if outfile != options.stdout: outfile.close() gene2go = GO.MapGO2Slims(gene2go, go_slims, ontology=ontology) if options.loglevel >= 1: ngenes, ncategories, nmaps, counts_per_category = \ GO.CountGO(gene2go) options.stdlog.write( "# after go slim filtering: %i genes mapped to " "%i categories (%i maps)\n" % ( ngenes, ncategories, nmaps)) ############################################################# # Just dump out the gene list if options.get_genes: fg, bg, ng = [], [], [] for gene, vv in list(gene2go.items()): for v in vv: if v.mGOId == options.get_genes: if gene in genes: fg.append(gene) elif gene in background: bg.append(gene) else: ng.append(gene) # skip to next GO class if not (bg or ng): continue options.stdout.write( "# genes in GO category %s\n" % options.get_genes) options.stdout.write("gene\tset\n") for x in sorted(fg): options.stdout.write("%s\t%s\n" % ("fg", x)) for x in sorted(bg): options.stdout.write("%s\t%s\n" % ("bg", x)) for x in sorted(ng): options.stdout.write("%s\t%s\n" % ("ng", x)) E.info("nfg=%i, nbg=%i, nng=%i" % (len(fg), len(bg), len(ng))) E.stop() sys.exit(0) ############################################################# outfile = GO.getFileName(options, go=test_ontology, section='foreground', set=genelist_name) outfile.write("gene_id\n%s\n" % ("\n".join(sorted(foreground)))) if options.output_filename_pattern: outfile.close() outfile = GO.getFileName(options, go=test_ontology, section='background', set=genelist_name) # Jethro bug fix - see section 'build background' for assignment outfile.write("gene_id\n%s\n" % ("\n".join(sorted(background)))) if options.output_filename_pattern: outfile.close() ############################################################# # do the analysis go_results = GO.AnalyseGO(gene2go, foreground, background) if len(go_results.mSampleGenes) == 0: E.warn("%s: no genes with GO categories - analysis aborted" % genelist_name) continue pairs = list(go_results.mResults.items()) ############################################################# # calculate fdr for each hypothesis if options.fdr: fdrs, samples, method = GO.computeFDRs(go_results, foreground, background, options, test_ontology, gene2go, go2info) for x, v in enumerate(pairs): v[1].mQValue = fdrs[v[0]][0] else: fdrs, samples, method = {}, {}, None msgs.append("fdr=%s" % method) if options.sort_order == "fdr": pairs.sort(key=lambda x: x[1].mQValue) elif options.sort_order == "ratio": pairs.sort(key=lambda x: x[1].mRatio) elif options.sort_order == "pvalue": pairs.sort(key=lambda x: x[1].mPValue) ############################################################# ############################################################# ############################################################# # output the full result outfile = GO.getFileName(options, go=test_ontology, section='overall', set=genelist_name) GO.outputResults( outfile, pairs, go2info, options, fdrs=fdrs, samples=samples) if options.output_filename_pattern: outfile.close() ############################################################# ############################################################# ############################################################# # filter significant results and output filtered_pairs = GO.selectSignificantResults(pairs, fdrs, options) nselected = len(filtered_pairs) nselected_up = len([x for x in filtered_pairs if x[1].mRatio > 1]) nselected_down = len( [x for x in filtered_pairs if x[1].mRatio < 1]) assert nselected_up + nselected_down == nselected outfile = GO.getFileName(options, go=test_ontology, section='results', set=genelist_name) GO.outputResults(outfile, filtered_pairs, go2info, options, fdrs=fdrs, samples=samples) if options.output_filename_pattern: outfile.close() ############################################################# ############################################################# ############################################################# # save results for multi-gene-list analysis all_results.append(pairs) all_significant_results.append(filtered_pairs) all_genelists_with_results.append(genelist_name) ############################################################# ############################################################# ############################################################# # output parameters ngenes, ncategories, nmaps, counts_per_category = \ GO.CountGO(gene2go) outfile = GO.getFileName(options, go=test_ontology, section='parameters', set=genelist_name) nbackground = len(background) if nbackground == 0: nbackground = len(go_results.mBackgroundGenes) outfile.write( "# input go mappings for gene list '%s' and category '%s'\n" % (genelist_name, test_ontology)) outfile.write("parameter\tvalue\tdescription\n") outfile.write("mapped_genes\t%i\tmapped genes\n" % ngenes) outfile.write( "mapped_categories\t%i\tmapped categories\n" % ncategories) outfile.write("mappings\t%i\tmappings\n" % nmaps) outfile.write("genes_in_fg\t%i\tgenes in foreground\n" % len(foreground)) outfile.write( "genes_in_fg_with_assignment\t%i\tgenes in foreground with GO assignments\n" % (len(go_results.mSampleGenes))) outfile.write( "genes_in_bg\t%i\tinput background\n" % nbackground) outfile.write( "genes_in_bg_with_assignment\t%i\tgenes in background with GO assignments\n" % ( len(go_results.mBackgroundGenes))) outfile.write( "associations_in_fg\t%i\tassociations in sample\n" % go_results.mSampleCountsTotal) outfile.write( "associations_in_bg\t%i\tassociations in background\n" % go_results.mBackgroundCountsTotal) outfile.write( "percent_genes_in_fg_with_association\t%s\tpercent genes in sample with GO assignments\n" % ( iotools.pretty_percent(len(go_results.mSampleGenes), len(foreground), "%5.2f"))) outfile.write( "percent_genes_in_bg_with_associations\t%s\tpercent genes background with GO assignments\n" % ( iotools.pretty_percent(len(go_results.mBackgroundGenes), nbackground, "%5.2f"))) outfile.write( "significant\t%i\tsignificant results reported\n" % nselected) outfile.write( "significant_up\t%i\tsignificant up-regulated results reported\n" % nselected_up) outfile.write( "significant_down\t%i\tsignificant up-regulated results reported\n" % nselected_down) outfile.write( "threshold\t%6.4f\tsignificance threshold\n" % options.threshold) if options.output_filename_pattern: outfile.close() summary.append("\t".join(map(str, ( genelist_name, test_ontology, nselected, options.threshold, ngenes, ncategories, nmaps, len(foreground), len(go_results.mSampleGenes), nbackground, len(go_results.mBackgroundGenes), go_results.mSampleCountsTotal, go_results.mBackgroundCountsTotal, iotools.pretty_percent( len(go_results.mSampleGenes), len(foreground), "%5.2f"), iotools.pretty_percent( len(go_results.mBackgroundGenes), nbackground, "%5.2f"), ",".join(msgs)))) + "\n") ############################################################# ############################################################# ############################################################# # output the fg patterns outfile = GO.getFileName(options, go=test_ontology, section='withgenes', set=genelist_name) GO.outputResults(outfile, pairs, go2info, options, fdrs=fdrs, samples=samples, gene2go=gene2go, foreground=foreground, gene2name=gene2name) if options.output_filename_pattern: outfile.close() if len(genelists) > 1: ################################################################### # output various summary files # significant results GO.outputMultipleGeneListResults(all_significant_results, all_genelists_with_results, test_ontology, go2info, options, section='significant') # all results GO.outputMultipleGeneListResults(all_results, all_genelists_with_results, test_ontology, go2info, options, section='all') if options.compute_pairwise: GO.pairwiseGOEnrichment(all_results, all_genelists_with_results, test_ontology, go2info, options) outfile_summary = options.stdout outfile_summary.write("".join(summary)) E.stop()
def _write(outs, text, numerator, denominator, base): percent = iotools.pretty_percent(numerator, denominator) outs.write('%s\t%i\t%s\t%s\n' % (text, numerator, percent, base))
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id: chain2psl.py 2899 2010-04-13 14:37:37Z andreas $", usage=globals()["__doc__"]) parser.add_option("-m", "--output-mismatches", dest="output_mismatches", action="store_true", help="output mismatches [%default]") parser.add_option("-a", "--output-matches", dest="output_matches", action="store_true", help="output matches [%default]") parser.add_option("-u", "--output-unique", dest="output_unique", action="store_true", help="output unique positions [%default]") parser.add_option("-r", "--restrict", dest="restrict", type="string", help="restrict analysis to a chromosome pair (chr1:chr1:+) [%default]") parser.set_defaults( output_mismatches=False, output_unique=False, restrict=None ) # add common options (-h/--help, ...) and parse command line (options, args) = E.start(parser, argv=argv) if len(args) != 2: raise ValueError("expected two chain files") filename_chain1, filename_chain2 = args E.info("validating chain 1") if not validateChain(iotools.open_file(filename_chain1)): E.warn("validation failed - exiting") return 1 E.info("validating chain 2") if not validateChain(iotools.open_file(filename_chain2)): E.warn("validation failed - exiting") return 1 E.info("building pairs for %s" % filename_chain1) pairs1 = buildPairs(iotools.open_file(filename_chain1)) E.info("read %i pairs" % len(pairs1)) E.info("building pairs for %s" % filename_chain2) pairs2 = buildPairs(iotools.open_file(filename_chain2)) E.info("read %i pairs" % len(pairs2)) if options.restrict: restrict = tuple(options.restrict.split(":")) pairs1 = {restrict: pairs1[restrict]} pairs2 = {restrict: pairs2[restrict]} E.info("comparing 1 -> 2") comparison1 = compareChains(pairs1, pairs2) E.info("comparing 2 -> 1") comparison2 = compareChains(pairs2, pairs1) all_keys = sorted(list(set(list(comparison1.keys()) + list(comparison2.keys())))) outfile = options.stdout headers = ("mapped", "identical", "different", "unique") outfile.write("contig1\tcontig2\tstrand\t%s\t%s\t%s\t%s\n" % ( "\t".join(["%s1" % x for x in headers]), "\t".join(["p%s1" % x for x in headers]), "\t".join(["%s2" % x for x in headers]), "\t".join(["p%s2" % x for x in headers]))) totals = E.Counter() for key in all_keys: outfile.write("%s\t%s\t%s" % key) if key in comparison1: c = comparison1[key] outfile.write("\t%i\t%i\t%i\t%i\t" % (c.total, c.same, c.different, c.unique)) outfile.write( "\t".join([iotools.pretty_percent(x, c.total) for x in c])) totals.total1 += c.total totals.same1 += c.same totals.different1 += c.different totals.unique1 += c.unique else: outfile.write("\t%i\t%i\t%i\t%i\t" % (0, 0, 0, 0)) outfile.write("\t%i\t%i\t%i\t%i" % (0, 0, 0, 0)) if key in comparison2: c = comparison2[key] outfile.write("\t%i\t%i\t%i\t%i\t" % (c.total, c.same, c.different, c.unique)) outfile.write( "\t".join([iotools.pretty_percent(x, c.total) for x in c])) totals.same2 += c.same totals.total2 += c.total totals.different2 += c.different totals.unique2 += c.unique else: outfile.write("\t%i\t%i\t%i\t%i\t" % (0, 0, 0, 0)) outfile.write("\t%i\t%i\t%i\t%i" % (0, 0, 0, 0)) outfile.write("\n") outfile.write("total\ttotal\t.\t") outfile.write("\t".join(map(str, (totals.total1, totals.same1, totals.different1, totals.unique1, iotools.pretty_percent( totals.total1, totals.total1), iotools.pretty_percent( totals.same1, totals.total1), iotools.pretty_percent( totals.different1, totals.total1), iotools.pretty_percent( totals.unique1, totals.total1), totals.total2, totals.same2, totals.different2, totals.unique2, iotools.pretty_percent( totals.total2, totals.total2), iotools.pretty_percent( totals.same2, totals.total2), iotools.pretty_percent( totals.different2, totals.total2), iotools.pretty_percent( totals.unique2, totals.total2), ))) + "\n") # output mismapped residues if options.output_mismatches or options.output_unique: outputMismatches(pairs1, pairs2, output_mismatches=options.output_mismatches, output_unique=options.output_unique, output_matches=options.output_matches, ) # write footer and output benchmark information. E.stop()