def alignIndels( all_alleles, colcounts, extend_by = 0 ): '''align all indel-regions.''' aa = alignlib_lite.py_makeAlignatorDPFull( alignlib_lite.py_ALIGNMENT_LOCAL, 0, 0 ) alignator = alignlib_lite.py_makeMultipleAlignatorSimple( aa) ids = all_alleles.keys() for x,c in enumerate(colcounts): if c <= 1: continue sequences = alignlib_lite.py_StringVector() for sid in ids: for allele in all_alleles[sid]: sequences.append( allele[x] ) mali = alignlib_lite.py_makeMultAlignment() alignator.align( mali, sequences ) realigned = [] for line in str(alignlib_lite.py_MultAlignmentFormatPlain( mali, sequences )).split("\n")[:-1]: data = line[:-1].split("\t") realigned.append( data[1] ) assert len(realigned) == len(sequences) l = max( [len(r) for r in realigned] ) i = 0 for sid in ids: for allele in all_alleles[sid]: if realigned[i]: allele[x] = realigned[i] else: allele[x] = "-" * l i += 1 colcounts[x] = l
def AlignExhaustive(seq_wobble, seq_cds, seq_peptide, map_p2c, options, diag_width=2): """Align two sequences. Align in chunks to keep memory low. Both sequences are roughly the same, thus align only in diagonal. """ gop, gep = -1.0, -1.0 matrix = alignlib_lite.py_makeSubstitutionMatrixBackTranslation( 1, -10, 1, alignlib_lite.py_getDefaultEncoder()) alignlib_lite.py_setDefaultSubstitutionMatrix(matrix) if seq_wobble.getLength() < 10000: if options.loglevel >= 6: options.stdlog.write("# using full dynamic programing matrix.\n") options.stdlog.flush() # do not penalize gaps at the end, because sometimes the last codon # might be missing alignator = alignlib_lite.py_makeAlignatorDPFull( alignlib_lite.py_ALIGNMENT_GLOBAL, gop, gep, 1, 1) else: diag_width = abs(seq_wobble.getLength() - seq_cds.getLength()) + 1 if options.loglevel >= 6: options.stdlog.write("# using dot alignment with diagonal %i\n" % diag_width) options.stdlog.flush() dots = alignlib_lite.py_makeAlignmentMatrixRow() for x in range(0, seq_wobble.getLength()): xr = seq_wobble.asResidue(x) for y in range(max(0, x - diag_width), min(seq_cds.getLength(), x + diag_width)): s = matrix.getValue(xr, seq_cds.asResidue(y)) if s >= 0: dots.addPair(x, y, float(s)) if options.loglevel >= 6: options.stdlog.write("# finished adding %i dots" % dots.getLength()) options.stdlog.flush() alignator_dummy = alignlib_lite.py_makeAlignatorPrebuilt(dots) alignator = alignlib_lite.py_makeAlignatorDots(alignator_dummy, gop, gep) alignator.align(map_p2c, seq_wobble, seq_cds)
def AlignExhaustive(seq_wobble, seq_cds, seq_peptide, map_p2c, options, diag_width=2): """Align two sequences. Align in chunks to keep memory low. Both sequences are roughly the same, thus align only in diagonal. """ gop, gep = -1.0, -1.0 matrix = alignlib_lite.py_makeSubstitutionMatrixBackTranslation( 1, -10, 1, alignlib_lite.py_getDefaultEncoder()) alignlib_lite.py_setDefaultSubstitutionMatrix(matrix) if seq_wobble.getLength() < 10000: if options.loglevel >= 6: options.stdlog.write("# using full dynamic programing matrix.\n") options.stdlog.flush() # do not penalize gaps at the end, because sometimes the last codon # might be missing alignator = alignlib_lite.py_makeAlignatorDPFull( alignlib_lite.py_ALIGNMENT_GLOBAL, gop, gep, 1, 1) else: diag_width = abs(seq_wobble.getLength() - seq_cds.getLength()) + 1 if options.loglevel >= 6: options.stdlog.write( "# using dot alignment with diagonal %i\n" % diag_width) options.stdlog.flush() dots = alignlib_lite.py_makeAlignmentMatrixRow() for x in range(0, seq_wobble.getLength()): xr = seq_wobble.asResidue(x) for y in range(max(0, x - diag_width), min(seq_cds.getLength(), x + diag_width)): s = matrix.getValue(xr, seq_cds.asResidue(y)) if s >= 0: dots.addPair(x, y, float(s)) if options.loglevel >= 6: options.stdlog.write( "# finished adding %i dots" % dots.getLength()) options.stdlog.flush() alignator_dummy = alignlib_lite.py_makeAlignatorPrebuilt(dots) alignator = alignlib_lite.py_makeAlignatorDots( alignator_dummy, gop, gep) alignator.align(map_p2c, seq_wobble, seq_cds)
def Align( self, method, anchor = 0, loglevel = 1 ): """align a pair of sequences. get rid of this and use a method class instead in the future """ map_a2b = alignlib_lite.py_makeAlignmentVector() s1 = "A" * anchor + self.mSequence1 + "A" * anchor s2 = "A" * anchor + self.mSequence2 + "A" * anchor self.strand = "+" if method == "dialign": dialign = WrapperDialign.Dialign( self.mOptionsDialign ) dialign.Align( s1, s2, map_a2b ) elif method == "blastz": blastz = WrapperBlastZ.BlastZ( self.mOptionsBlastZ ) blastz.Align( s1, s2, map_a2b ) if blastz.isReverseComplement(): self.strand = "-" self.mSequence2 = Genomics.complement( self.mSequence2 ) elif method == "dialignlgs": dialignlgs = WrapperDialign.Dialign( self.mOptionsDialignLGS ) dialignlgs.Align( s1, s2, map_a2b ) elif method == "dba": dba = WrapperDBA.DBA() dba.Align( s1, s2, map_a2b ) elif method == "clustal": raise NotImplementedError( "clustal wrapper needs to be updated") clustal = WrapperClustal.Clustal() clustal.Align( s1, s2, map_a2b ) elif method == "nw": seq1 = alignlib_lite.py_makeSequence( s1 ) seq2 = alignlib_lite.py_makeSequence( s2 ) alignator = alignlib_lite.py_makeAlignatorDPFull( alignlib_lite.py_ALIGNMENT_GLOBAL, gop=-12.0, gep=-2.0 ) alignator.align( map_a2b, seq1, seq2 ) elif method == "sw": seq1 = alignlib_lite.py_makeSequence( s1 ) seq2 = alignlib_lite.py_makeSequence( s2 ) alignlib_lite.py_performIterativeAlignment( map_a2b, seq1, seq2, alignator_sw, min_score_sw ) else: ## use callback function method(s1, s2, map_a2b) if map_a2b.getLength() == 0: raise AlignmentError("empty alignment") if anchor: map_a2b.removeRowRegion( anchor + len(self.mSequence1) + 1, map_a2b.getRowTo() ) map_a2b.removeRowRegion( 1, anchor) map_a2b.removeColRegion( anchor + len(self.mSequence2) + 1, map_a2b.getColTo() ) map_a2b.removeColRegion( 1, anchor) map_a2b.moveAlignment( -anchor, -anchor ) f = alignlib_lite.py_AlignmentFormatExplicit( map_a2b, alignlib_lite.py_makeSequence( self.mSequence1), alignlib_lite.py_makeSequence( self.mSequence2) ) self.mMethod = method self.mAlignment = map_a2b self.mAlignedSequence1, self.mAlignedSequence2 = f.mRowAlignment, f.mColAlignment f = alignlib_lite.py_AlignmentFormatEmissions( map_a2b ) self.mAlignment1, self.mAlignment2 = f.mRowAlignment, f.mColAlignment self.mAlignmentFrom1 = map_a2b.getRowFrom() self.mAlignmentTo1 = map_a2b.getRowTo() self.mAlignmentFrom2 = map_a2b.getColFrom() self.mAlignmentTo2 = map_a2b.getColTo() self.mNumGaps, self.mLength = map_a2b.getNumGaps(), map_a2b.getLength() self.mAligned = self.mLength - self.mNumGaps self.SetPercentIdentity() self.SetBlockSizes()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv == None: argv = sys.argv parser = E.OptionParser( version= "%prog version: $Id: codemls2tsv.py 2781 2009-09-10 11:33:14Z andreas $" ) parser.add_option("--methods", dest="methods", type="choice", action="append", choices=("summary-numbers", "jalview", "positive-site-table", "positive-site-list", "count-positive-sites"), help="methods for analysis.") parser.add_option("--selection-mode", dest="selection_mode", type="choice", choices=("all", "consistent", "emes"), help="how to select positive sites.") parser.add_option("--prefix", dest="prefix", type="string", help="prefix for rows.") parser.add_option("--pattern-input-filenames", dest="pattern_input_filenames", type="string", help="input pattern.") parser.add_option( "--filter-probability", dest="filter_probability", type="float", help= "threshold for probability above which to include positive sites [default=%default]." ) parser.add_option( "--filter-omega", dest="filter_omega", type="float", help= "threshold for omega above which to include positive sites [default=%default]." ) parser.add_option("--models", dest="models", type="string", help="restrict output to set of site specific models.") parser.add_option("--analysis", dest="analysis", type="string", help="restrict output to set of analysis [beb|neb].") parser.add_option("--significance-threshold", dest="significance_threshold", type="float", help="significance threshold for log-likelihood test.") parser.add_option("--filter-mali", dest="filter_mali", type="choice", choices=("none", "gaps"), help="filter by mali to remove gapped positions.") parser.add_option( "--filename-mali", dest="filename_mali", type="string", help= "filename with multiple alignment used for calculating sites - used for filtering" ) parser.add_option( "--filename-map-mali", dest="filename_map_mali", type="string", help="filename with multiple alignment to map sites onto.") parser.add_option( "--jalview-titles", dest="jalview_titles", type="string", help="comma separated list of jalview annotation titles.") parser.add_option("--jalview-symbol", dest="jalview_symbol", type="string", help="symbol to use in jalview.") parser.set_defaults( methods=[], prefix=None, filter_probability=0, filter_omega=0, models="", analysis="", significance_threshold=0.05, selection_mode="consistent", filename_mali=None, filename_map_mali=None, jalview_symbol="*", jalview_titles="", filter_mali=None, ) (options, args) = E.Start(parser) if options.jalview_titles: options.jalview_titles = options.jalview_titles.split(",") else: options.jalview_titles = args options.models = options.models.split(",") options.analysis = options.analysis.split(",") for a in options.analysis: if a not in ("beb", "neb"): raise "unknown analysis section: '%s', possible values are 'beb' and/or 'neb'" % a for a in options.models: if a not in ("8", "2", "3"): raise "unknown model: '%s', possible values are 2, 3, 8" % a codeml = WrapperCodeML.CodeMLSites() ## filter and extract functions filter_f = lambda x: x.mProbability >= options.filter_probability and x.mOmega >= options.filter_omega extract_f = lambda x: x.mResidue ## read multiple results results = [] ninput, noutput, nskipped = 0, 0, 0 headers = [] for f in args: ninput += 1 try: results.append(codeml.parseOutput(open(f, "r").readlines())) except WrapperCodeML.UsageError: if options.loglevel >= 1: options.stdlog.write("# no input from %s\n" % f) nskipped += 1 continue noutput += 1 headers.append(f) ## map of nested model (key) to more general model map_nested_models = {'8': '7', '2': '1', '3': '0'} if options.filename_mali: mali = Mali.Mali() mali.readFromFile(open(options.filename_mali, "r")) else: mali = None ############################################################### ############################################################### ############################################################### ## use multiple alignment to map residues to a reference mali ## or a sequence. ############################################################### if options.filename_map_mali: if not mali: raise "please supply the input multiple alignment, if residues are to be mapped." ## translate the alignments def translate(s): sequence = s.mString seq = [] for codon in [ sequence[x:x + 3] for x in range(0, len(sequence), 3) ]: aa = Genomics.MapCodon2AA(codon) seq.append(aa) s.mString = "".join(seq) tmali = Mali.Mali() tmali.readFromFile(open(options.filename_mali, "r")) tmali.apply(translate) tmap_mali = Mali.Mali() tmap_mali.readFromFile(open(options.filename_map_mali, "r")) if tmap_mali.getAlphabet() == "na": tmap_mali.apply(translate) map_old2new = alignlib_lite.py_makeAlignmentVector() mali1 = alignlib_lite.py_makeProfileFromMali(convertMali2Mali(tmali)) if tmap_mali.getLength() == 1: s = tmap_mali.values()[0].mString mali2 = alignlib_lite.py_makeSequence(s) ## see if you can find an identical subsequence and then align to thisD for x in tmali.values(): if s in re.sub("[- .]+", "", x.mString): mali1 = alignlib_lite.py_makeSequence(x.mString) break else: mali2 = alignlib_lite.py_makeProfileFromMali( convertMali2Mali(tmap_mali)) alignator = alignlib_lite.py_makeAlignatorDPFull( alignlib_lite.py_ALIGNMENT_LOCAL, -10.0, -2.0) alignator.align(map_old2new, mali1, mali2) consensus = tmap_mali.getConsensus() if options.loglevel >= 4: options.stdlog.write("# alphabet: %s\n" % tmap_mali.getAlphabet()) options.stdlog.write("# orig : %s\n" % tmali.getConsensus()) options.stdlog.write("# mapped: %s\n" % consensus) options.stdlog.write("# alignment: %s\n" % map_old2new.Write()) else: map_old2new = None for method in options.methods: if method == "summary-numbers": options.stdlog.write( \ """# Numbers of positive sites. # # The consistent row/column contains positive sites that are significant # (above thresholds for probability and omega) for all models/analysis # that have been selected (label: cons). # # The log-likelihood ratio test is performed for model pairs, depending # on the output chosen. # Significance threshold: %6.4f # The pairs are 8 versus 7 and 2 versus 1 and 3 versus 0. # """ % options.significance_threshold ) ## write header if options.prefix: options.stdout.write("prefix\t") options.stdout.write("method\tnseq\t") h = [] for model in options.models: for analysis in options.analysis: h.append("%s%s" % (analysis, model)) h.append("p%s" % (model)) h.append("df%s" % (model)) h.append("chi%s" % (model)) h.append("lrt%s" % (model)) options.stdout.write("\t".join(h)) options.stdout.write("\tcons\tpassed\tfilename\n") nmethod = 0 consistent_cols = [None for x in range(len(options.analysis))] passed_tests = {} for m in options.models: passed_tests[m] = 0 for result in results: row_consistent = None if options.prefix: options.stdout.write("%s" % (options.prefix)) options.stdout.write("%i" % nmethod) options.stdout.write("\t%i" % (result.mNumSequences)) npassed = 0 for model in options.models: sites = result.mSites[model] ## do significance test full_model, null_model = model, map_nested_models[model] lrt = Stats.doLogLikelihoodTest( result.mSites[full_model].mLogLikelihood, result.mSites[full_model].mNumParameters, result.mSites[null_model].mLogLikelihood, result.mSites[null_model].mNumParameters, options.significance_threshold) x = 0 for analysis in options.analysis: if analysis == "neb": s = set( map( extract_f, filter(filter_f, sites.mNEB.mPositiveSites))) elif analysis == "beb": s = set( map( extract_f, filter(filter_f, sites.mBEB.mPositiveSites))) options.stdout.write("\t%i" % (len(s))) if not lrt.mPassed: s = set() if row_consistent == None: row_consistent = s else: row_consistent = row_consistent.intersection(s) if consistent_cols[x] == None: consistent_cols[x] = s else: consistent_cols[x] = consistent_cols[ x].intersection(s) x += 1 if lrt.mPassed: c = "passed" passed_tests[model] += 1 npassed += 1 else: c = "failed" options.stdout.write("\t%5.2e\t%i\t%5.2f\t%s" %\ (lrt.mProbability, lrt.mDegreesFreedom, lrt.mChiSquaredValue, c)) options.stdout.write( "\t%i\t%i\t%s\n" % (len(row_consistent), npassed, headers[nmethod])) nmethod += 1 if options.prefix: options.stdout.write("%s\t" % options.prefix) options.stdout.write("cons") row_consistent = None total_passed = 0 for model in options.models: x = 0 for analysis in options.analysis: s = consistent_cols[x] if s == None: s = set() options.stdout.write("\t%i" % (len(s))) if row_consistent == None: row_consistent = s else: row_consistent = row_consistent.intersection(s) x += 1 options.stdout.write("\tna\t%i" % passed_tests[model]) total_passed += passed_tests[model] options.stdout.write("\t%i\t%i\n" % (len(row_consistent), total_passed)) elif method == "jalview": options.stdout.write("JALVIEW_ANNOTATION\n") options.stdout.write("# Created: %s\n\n" % (time.asctime(time.localtime(time.time())))) l = 1 x = 0 for result in results: sites, significance = selectPositiveSites( [result], options.selection_mode, options, mali) codes = [""] * result.mLength if len(sites) == 0: continue for site in sites: codes[site - 1] = options.jalview_symbol options.stdout.write( "NO_GRAPH\t%s\t%s\n" % (options.jalview_titles[x], "|".join(codes))) x += 1 elif method == "count-positive-sites": sites, significance = selectPositiveSites(results, options.selection_mode, options, mali) options.stdout.write("%i\n" % (len(sites))) elif method in ("positive-site-table", ): sites, significance = selectPositiveSites(results, options.selection_mode, options, mali) headers = ["site", "P"] if map_old2new: headers.append("mapped") headers.append("Pm") options.stdout.write("\t".join(headers) + "\n") sites = list(sites) sites.sort() nmapped, nunmapped = 0, 0 for site in sites: values = [site, "%6.4f" % significance[site]] if map_old2new: r = map_old2new.mapRowToCol(site) if r == 0: values.append("na") values.append("") nunmapped += 1 if options.loglevel >= 2: options.stdlog.write("# unmapped residue: %i\n" % site) else: values.append(r) values.append(consensus[r - 1]) nmapped += 1 options.stdout.write("\t".join(map(str, (values))) + "\n") if options.loglevel >= 1: options.stdlog.write( "# sites: ninput=%i, noutput=%i, nskipped=%i\n" % (len(sites), nmapped, nunmapped)) E.info("ninput=%i, noutput=%i, nskipped=%i" % (ninput, noutput, nskipped)) E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version="%prog version: $Id: codemls2tsv.py 2781 2009-09-10 11:33:14Z andreas $") parser.add_option("--methods", dest="methods", type="choice", action="append", choices=("summary-numbers", "jalview", "positive-site-table", "positive-site-list", "count-positive-sites"), help="methods for analysis.") parser.add_option("--selection-mode", dest="selection_mode", type="choice", choices=("all", "consistent", "emes"), help="how to select positive sites.") parser.add_option("--prefix", dest="prefix", type="string", help="prefix for rows.") parser.add_option("--pattern-input-filenames", dest="pattern_input_filenames", type="string", help="input pattern.") parser.add_option("--filter-probability", dest="filter_probability", type="float", help="threshold for probability above which to include positive sites [default=%default].") parser.add_option("--filter-omega", dest="filter_omega", type="float", help="threshold for omega above which to include positive sites [default=%default].") parser.add_option("--models", dest="models", type="string", help="restrict output to set of site specific models.") parser.add_option("--analysis", dest="analysis", type="string", help="restrict output to set of analysis [beb|neb].") parser.add_option("--significance-threshold", dest="significance_threshold", type="float", help="significance threshold for log-likelihood test.") parser.add_option("--filter-mali", dest="filter_mali", type="choice", choices=("none", "gaps"), help="filter by mali to remove gapped positions.") parser.add_option("--filename-mali", dest="filename_mali", type="string", help="filename with multiple alignment used for calculating sites - used for filtering") parser.add_option("--filename-map-mali", dest="filename_map_mali", type="string", help="filename with multiple alignment to map sites onto.") parser.add_option("--jalview-titles", dest="jalview_titles", type="string", help="comma separated list of jalview annotation titles.") parser.add_option("--jalview-symbol", dest="jalview_symbol", type="string", help="symbol to use in jalview.") parser.set_defaults( methods=[], prefix=None, filter_probability=0, filter_omega=0, models="", analysis="", significance_threshold=0.05, selection_mode="consistent", filename_mali=None, filename_map_mali=None, jalview_symbol="*", jalview_titles="", filter_mali=None, ) (options, args) = E.Start(parser) if options.jalview_titles: options.jalview_titles = options.jalview_titles.split(",") else: options.jalview_titles = args options.models = options.models.split(",") options.analysis = options.analysis.split(",") for a in options.analysis: if a not in ("beb", "neb"): raise "unknown analysis section: '%s', possible values are 'beb' and/or 'neb'" % a for a in options.models: if a not in ("8", "2", "3"): raise "unknown model: '%s', possible values are 2, 3, 8" % a codeml = WrapperCodeML.CodeMLSites() # filter and extract functions filter_f = lambda x: x.mProbability >= options.filter_probability and x.mOmega >= options.filter_omega extract_f = lambda x: x.mResidue # read multiple results results = [] ninput, noutput, nskipped = 0, 0, 0 headers = [] for f in args: ninput += 1 try: results.append(codeml.parseOutput(open(f, "r").readlines())) except WrapperCodeML.UsageError: if options.loglevel >= 1: options.stdlog.write("# no input from %s\n" % f) nskipped += 1 continue noutput += 1 headers.append(f) # map of nested model (key) to more general model map_nested_models = {'8': '7', '2': '1', '3': '0'} if options.filename_mali: mali = Mali.Mali() mali.readFromFile(open(options.filename_mali, "r")) else: mali = None ############################################################### ############################################################### ############################################################### # use multiple alignment to map residues to a reference mali # or a sequence. ############################################################### if options.filename_map_mali: if not mali: raise "please supply the input multiple alignment, if residues are to be mapped." # translate the alignments def translate(s): sequence = s.mString seq = [] for codon in [sequence[x:x + 3] for x in range(0, len(sequence), 3)]: aa = Genomics.MapCodon2AA(codon) seq.append(aa) s.mString = "".join(seq) tmali = Mali.Mali() tmali.readFromFile(open(options.filename_mali, "r")) tmali.apply(translate) tmap_mali = Mali.Mali() tmap_mali.readFromFile(open(options.filename_map_mali, "r")) if tmap_mali.getAlphabet() == "na": tmap_mali.apply(translate) map_old2new = alignlib_lite.py_makeAlignmentVector() mali1 = alignlib_lite.py_makeProfileFromMali(convertMali2Mali(tmali)) if tmap_mali.getLength() == 1: s = tmap_mali.values()[0].mString mali2 = alignlib_lite.py_makeSequence(s) # see if you can find an identical subsequence and then align to # thisD for x in tmali.values(): if s in re.sub("[- .]+", "", x.mString): mali1 = alignlib_lite.py_makeSequence(x.mString) break else: mali2 = alignlib_lite.py_makeProfileFromMali( convertMali2Mali(tmap_mali)) alignator = alignlib_lite.py_makeAlignatorDPFull( alignlib_lite.py_ALIGNMENT_LOCAL, -10.0, -2.0) alignator.align(map_old2new, mali1, mali2) consensus = tmap_mali.getConsensus() if options.loglevel >= 4: options.stdlog.write("# alphabet: %s\n" % tmap_mali.getAlphabet()) options.stdlog.write("# orig : %s\n" % tmali.getConsensus()) options.stdlog.write("# mapped: %s\n" % consensus) options.stdlog.write("# alignment: %s\n" % map_old2new.Write()) else: map_old2new = None for method in options.methods: if method == "summary-numbers": options.stdlog.write( """# Numbers of positive sites. # # The consistent row/column contains positive sites that are significant # (above thresholds for probability and omega) for all models/analysis # that have been selected (label: cons). # # The log-likelihood ratio test is performed for model pairs, depending # on the output chosen. # Significance threshold: %6.4f # The pairs are 8 versus 7 and 2 versus 1 and 3 versus 0. # """ % options.significance_threshold ) # write header if options.prefix: options.stdout.write("prefix\t") options.stdout.write("method\tnseq\t") h = [] for model in options.models: for analysis in options.analysis: h.append("%s%s" % (analysis, model)) h.append("p%s" % (model)) h.append("df%s" % (model)) h.append("chi%s" % (model)) h.append("lrt%s" % (model)) options.stdout.write("\t".join(h)) options.stdout.write("\tcons\tpassed\tfilename\n") nmethod = 0 consistent_cols = [None for x in range(len(options.analysis))] passed_tests = {} for m in options.models: passed_tests[m] = 0 for result in results: row_consistent = None if options.prefix: options.stdout.write("%s" % (options.prefix)) options.stdout.write("%i" % nmethod) options.stdout.write("\t%i" % (result.mNumSequences)) npassed = 0 for model in options.models: sites = result.mSites[model] # do significance test full_model, null_model = model, map_nested_models[model] lrt = Stats.doLogLikelihoodTest( result.mSites[full_model].mLogLikelihood, result.mSites[full_model].mNumParameters, result.mSites[null_model].mLogLikelihood, result.mSites[null_model].mNumParameters, options.significance_threshold) x = 0 for analysis in options.analysis: if analysis == "neb": s = set( map(extract_f, filter(filter_f, sites.mNEB.mPositiveSites))) elif analysis == "beb": s = set( map(extract_f, filter(filter_f, sites.mBEB.mPositiveSites))) options.stdout.write("\t%i" % (len(s))) if not lrt.mPassed: s = set() if row_consistent is None: row_consistent = s else: row_consistent = row_consistent.intersection(s) if consistent_cols[x] is None: consistent_cols[x] = s else: consistent_cols[x] = consistent_cols[ x].intersection(s) x += 1 if lrt.mPassed: c = "passed" passed_tests[model] += 1 npassed += 1 else: c = "failed" options.stdout.write("\t%5.2e\t%i\t%5.2f\t%s" % (lrt.mProbability, lrt.mDegreesFreedom, lrt.mChiSquaredValue, c)) options.stdout.write( "\t%i\t%i\t%s\n" % (len(row_consistent), npassed, headers[nmethod])) nmethod += 1 if options.prefix: options.stdout.write("%s\t" % options.prefix) options.stdout.write("cons") row_consistent = None total_passed = 0 for model in options.models: x = 0 for analysis in options.analysis: s = consistent_cols[x] if s is None: s = set() options.stdout.write("\t%i" % (len(s))) if row_consistent is None: row_consistent = s else: row_consistent = row_consistent.intersection(s) x += 1 options.stdout.write("\tna\t%i" % passed_tests[model]) total_passed += passed_tests[model] options.stdout.write( "\t%i\t%i\n" % (len(row_consistent), total_passed)) elif method == "jalview": options.stdout.write("JALVIEW_ANNOTATION\n") options.stdout.write("# Created: %s\n\n" % (time.asctime(time.localtime(time.time())))) l = 1 x = 0 for result in results: sites, significance = selectPositiveSites( [result], options.selection_mode, options, mali) codes = [""] * result.mLength if len(sites) == 0: continue for site in sites: codes[site - 1] = options.jalview_symbol options.stdout.write( "NO_GRAPH\t%s\t%s\n" % (options.jalview_titles[x], "|".join(codes))) x += 1 elif method == "count-positive-sites": sites, significance = selectPositiveSites( results, options.selection_mode, options, mali) options.stdout.write("%i\n" % (len(sites))) elif method in ("positive-site-table", ): sites, significance = selectPositiveSites( results, options.selection_mode, options, mali) headers = ["site", "P"] if map_old2new: headers.append("mapped") headers.append("Pm") options.stdout.write("\t".join(headers) + "\n") sites = list(sites) sites.sort() nmapped, nunmapped = 0, 0 for site in sites: values = [site, "%6.4f" % significance[site]] if map_old2new: r = map_old2new.mapRowToCol(site) if r == 0: values.append("na") values.append("") nunmapped += 1 if options.loglevel >= 2: options.stdlog.write( "# unmapped residue: %i\n" % site) else: values.append(r) values.append(consensus[r - 1]) nmapped += 1 options.stdout.write("\t".join(map(str, (values))) + "\n") if options.loglevel >= 1: options.stdlog.write("# sites: ninput=%i, noutput=%i, nskipped=%i\n" % ( len(sites), nmapped, nunmapped)) E.info("ninput=%i, noutput=%i, nskipped=%i" % (ninput, noutput, nskipped)) E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage=globals()["__doc__"]) parser.add_option("-o", "--gop", dest="gop", type="float", help="gap opening penalty [default=%default].") parser.add_option("-e", "--gep", dest="gep", type="float", help="gap extension penalty [default=%default].") parser.add_option("-m", "--mode", dest="mode", type="choice", choices=("global", "local"), help="alignment mode, global=nw, local=sw [default=%default].") parser.set_defaults( gop=-12.0, gep=-2.0, format="fasta", mode="local", ) # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) if len(args) != 2: raise ValueError( "please supply two multiple alignments in FASTA format.") mali1 = Mali.Mali() mali2 = Mali.Mali() E.info("read 2 multiple alignments") mali1.readFromFile(IOTools.openFile(args[0], "r"), format=options.format) mali2.readFromFile(IOTools.openFile(args[1], "r"), format=options.format) cmali1 = Mali.convertMali2Alignlib(mali1) cmali2 = Mali.convertMali2Alignlib(mali2) if options.mode == "local": mode = alignlib_lite.py_ALIGNMENT_LOCAL elif options.mode == "global": mode = alignlib_lite.py_ALIGNMENT_GLOBAL alignator = alignlib_lite.py_makeAlignatorDPFull(mode, options.gop, options.gep) alignlib_lite.py_setDefaultEncoder( alignlib_lite.py_getEncoder(alignlib_lite.py_Protein20)) alignlib_lite.py_setDefaultLogOddor( alignlib_lite.py_makeLogOddorDirichlet(0.3)) alignlib_lite.py_setDefaultRegularizor( alignlib_lite.py_makeRegularizorDirichletPrecomputed()) cprofile1 = alignlib_lite.py_makeProfile(cmali1) cprofile2 = alignlib_lite.py_makeProfile(cmali2) result = alignlib_lite.py_makeAlignmentVector() alignator.align(result, cprofile1, cprofile2) E.debug("result=\n%s" % alignlib_lite.py_AlignmentFormatEmissions(result)) cmali1.add(cmali2, result) outmali = Mali.convertAlignlib2Mali(cmali1, identifiers=mali1.getIdentifiers() + mali2.getIdentifiers()) outmali.writeToFile(options.stdout, format=options.format) # write footer and output benchmark information. E.Stop()
optlist, args = getopt.getopt(sys.argv[1:], param_short_options, param_long_options) except getopt.error, msg: print globals()["__doc__"], msg sys.exit(2) for o, a in optlist: if o in ("-v", "--verbose"): param_loglevel = int(a) elif o in ("--version",): print "version=" sys.exit(0) elif o in ("-h", "--help"): print globals()["__doc__"] sys.exit(0) alignator = alignlib_lite.py_makeAlignatorDPFull(alignlib_lite.py_ALIGNMENT_LOCAL, param_gop, param_gep) map_query2token = alignlib_lite.py_makeAlignmentVector() for line in sys.stdin: if line[0] == "#": continue query_token, sbjct_token, query_sequence, sbjct_sequence = string.split(line[:-1], "\t") map_query2token.clear() row = alignlib_lite.py_makeSequence(query_sequence) col = alignlib_lite.py_makeSequence(sbjct_sequence) alignator.align(map_query2token, row, col) pidentity = 100.0 * alignlib_lite.py_calculatePercentIdentity(map_query2token, row, col) psimilarity = 100.0 * alignlib_lite.py_calculatePercentSimilarity(map_query2token)
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser( version= "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage=globals()["__doc__"]) parser.add_option("-o", "--gop", dest="gop", type="float", help="gap opening penalty [default=%default].") parser.add_option("-e", "--gep", dest="gep", type="float", help="gap extension penalty [default=%default].") parser.add_option( "-m", "--mode", dest="mode", type="choice", choices=("global", "local"), help="alignment mode, global=nw, local=sw [default=%default].") parser.set_defaults( gop=-12.0, gep=-2.0, format="fasta", mode="local", ) # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) if len(args) != 2: raise ValueError( "please supply two multiple alignments in FASTA format.") mali1 = Mali.Mali() mali2 = Mali.Mali() E.info("read 2 multiple alignments") mali1.readFromFile(IOTools.openFile(args[0], "r"), format=options.format) mali2.readFromFile(IOTools.openFile(args[1], "r"), format=options.format) cmali1 = Mali.convertMali2Alignlib(mali1) cmali2 = Mali.convertMali2Alignlib(mali2) if options.mode == "local": mode = alignlib_lite.py_ALIGNMENT_LOCAL elif options.mode == "global": mode = alignlib_lite.py_ALIGNMENT_GLOBAL alignator = alignlib_lite.py_makeAlignatorDPFull(mode, options.gop, options.gep) alignlib_lite.py_setDefaultEncoder( alignlib_lite.py_getEncoder(alignlib_lite.py_Protein20)) alignlib_lite.py_setDefaultLogOddor( alignlib_lite.py_makeLogOddorDirichlet(0.3)) alignlib_lite.py_setDefaultRegularizor( alignlib_lite.py_makeRegularizorDirichletPrecomputed()) cprofile1 = alignlib_lite.py_makeProfile(cmali1) cprofile2 = alignlib_lite.py_makeProfile(cmali2) result = alignlib_lite.py_makeAlignmentVector() alignator.align(result, cprofile1, cprofile2) E.debug("result=\n%s" % alignlib_lite.py_AlignmentFormatEmissions(result)) cmali1.add(cmali2, result) outmali = Mali.convertAlignlib2Mali(cmali1, identifiers=mali1.getIdentifiers() + mali2.getIdentifiers()) outmali.writeToFile(options.stdout, format=options.format) # write footer and output benchmark information. E.Stop()
param_long_options) except getopt.error, msg: print globals()["__doc__"], msg sys.exit(2) for o, a in optlist: if o in ("-v", "--verbose"): param_loglevel = int(a) elif o in ("--version", ): print "version=" sys.exit(0) elif o in ("-h", "--help"): print globals()["__doc__"] sys.exit(0) alignator = alignlib_lite.py_makeAlignatorDPFull( alignlib_lite.py_ALIGNMENT_LOCAL, param_gop, param_gep) map_query2token = alignlib_lite.py_makeAlignmentVector() for line in sys.stdin: if line[0] == "#": continue query_token, sbjct_token, query_sequence, sbjct_sequence = string.split( line[:-1], "\t") map_query2token.clear() row = alignlib_lite.py_makeSequence(query_sequence) col = alignlib_lite.py_makeSequence(sbjct_sequence) alignator.align(map_query2token, row, col) pidentity = 100.0 * \