def run( codon_model, data, xgram, options ): """run xgram on data. """ t0 = time.time() if options.loglevel >= 1: options.stdlog.write( "# executing dart/codeml with codon model %s\n" % codon_model ) model_variable = Codons.buildCodonML(codon_model = codon_model, fix_frequencies = options.fix_frequencies ) if options.insert_frequencies: setFrequencies( model_variable, data ) if options.fix_rates: setFixedRates( model_variable, options.fix_rates ) writeModel( model_variable, "input_variable", options ) t1 = time.time() trained_variable = xgram.train( model_variable, data ) writeModel( trained_variable.getModel(), "trained_variable", options ) t2 = time.time() model_fixed = Codons.buildCodonML( codon_model = codon_model, fix_frequencies = options.fix_frequencies, fix_omega = True) if options.insert_frequencies: setFrequencies( model_fixed, data ) writeModel( model_fixed, "input_fixed", options ) t3 = time.time() trained_fixed = xgram.train( model_fixed, data ) writeModel( trained_fixed.getModel(), "trained_fixed", options ) t4 = time.time() if options.loglevel >= 1: options.stdlog.write('# dart/codeml finished in %i seconds.\n' % (t4-t0)) if options.loglevel >= 2: options.stdlog.write( "# execution times\t%i\t%i\t%i\t%i\t%i\n" % (t4-t0,t1-t0, t2-t1, t3-t2, t4-t3)) return trained_fixed, trained_variable
def setFrequencies(model, mali, prefix=""): """set frequencies in a model according to those observed in data. prefix: prefix for rate parameters. Frequencies are labelled: pa0, pc0, ..., pa1, pc1, ..., pa2, pc2, ... """ try: frequencies = Codons.getFrequenciesPerCodonPosition( [x.mString for x in mali.values()]) except XGram.Exceptions.UsageError: return ## build a dummy grammar to insert frequencies dummy_grammar = XGram.Model.Grammar() for x in range(0, 3): params = [] for a in ('A', 'C', 'G', 'T'): params.append( ("%sp%s%i" % (prefix, a.lower(), x), frequencies[x][a])) dummy_grammar.addVariable(params) model.mGrammar.copyParameters(dummy_grammar, ignore_missing=True)
def setFrequencies(model, mali, prefix=""): """set frequencies in a model according to those observed in data. prefix: prefix for rate parameters. Frequencies are labelled: pa0, pc0, ..., pa1, pc1, ..., pa2, pc2, ... """ try: frequencies = Codons.getFrequenciesPerCodonPosition( [x.mString for x in list(mali.values())]) except XGram.Exceptions.UsageError: return # build a dummy grammar to insert frequencies dummy_grammar = XGram.Model.Grammar() for x in range(0, 3): params = [] for a in ('A', 'C', 'G', 'T'): params.append( ("%sp%s%i" % (prefix, a.lower(), x), frequencies[x][a])) dummy_grammar.addVariable(params) model.mGrammar.copyParameters(dummy_grammar, ignore_missing=True)
def setUp(self): """ set up data used in the tests. setUp is called before each test function execution. """ self.mModel = Codons.buildCodonML( "codons-four", num_blocks=2, grammar_type="linear-blocks", shared_frequencies = False, shared_rates = False )
def setUp(self): """ set up data used in the tests. setUp is called before each test function execution. """ self.mModel = Codons.buildCodonML("codons-four", num_blocks=2, grammar_type="linear-blocks", shared_frequencies=False, shared_rates=False)
def setFrequencies( model, data): """set frequencies in a model according to those observed in data. """ sequences = getSequencesFromStk( data ) frequencies = Codons.getFrequenciesPerCodonPosition( sequences.values() ) ## build a dummy grammar to insert frequencies dummy_grammar = XGram.Model.Grammar() for x in range(0,3): params = [] for a in ('A', 'C', 'G', 'T'): params.append( ("P%i%s" % (x, a), frequencies[x][a]) ) dummy_grammar.addVariable( params ) model.mGrammar.copyParameters( dummy_grammar, ignore_missing = True)
def prepareGrammar(xgram, mali, tree, map_old2new, blocks, options): """prepare grammar for custom grammars.""" labels = map(lambda x: x[1], blocks) nblocks = len(blocks) annotate_terminals = {} for x in range(len(labels)): annotations = [] key = [] for c in range(0, 3): t = "B%i_COD%i" % (x, c) key.append(t) annotations.append( Annotation(row="STATE", column=t, label=labels[x])) annotate_terminals[tuple(key)] = annotations input_model = Codons.buildCodonML( codon_model="f3x4-fourproducts", num_blocks=nblocks, grammar_type="linear-blocks", annotate_terminals=annotate_terminals, shared_frequencies=options.shared_frequencies, shared_rates=False, ) ## manually share rates between blocks if options.shared_rates == "kappa": for c in range(0, nblocks): input_model.renameParameter("B%i_Ri" % c, "Ri") input_model.renameParameter("B%i_Rv" % c, "Rv") elif options.shared_rates == "kappa-ds": for c in range(0, nblocks): input_model.renameParameter("B%i_Ri" % c, "Ri") input_model.renameParameter("B%i_Rv" % c, "Rv") input_model.renameParameter("B%i_Rs" % c, "Rs") elif options.shared_rates == "omega": for c in range(0, nblocks): input_model.renameParameter("B%i_Rs" % c, "Rs") input_model.renameParameter("B%i_Rn" % c, "Rn") elif options.shared_rates == "omega-ds": for c in range(0, nblocks): input_model.renameParameter("B%i_Rv" % c, "Rv") input_model.renameParameter("B%i_Rs" % c, "Rs") input_model.renameParameter("B%i_Rn" % c, "Rn") elif options.shared_rates == "ds": for c in range(0, nblocks): input_model.renameParameter("B%i_Rs" % c, "Rs") elif options.shared_rates == "all": for c in range(0, nblocks): input_model.renameParameter("B%i_Rv" % c, "Rv") input_model.renameParameter("B%i_Rs" % c, "Rs") input_model.renameParameter("B%i_Rn" % c, "Rn") input_model.renameParameter("B%i_Ri" % c, "Ri") writeModel(input_model, "input", options) ids = mali.getIdentifiers() fh, filename = tempfile.mkstemp() os.close(fh) outfile = open(filename, "w") ## clip mali by supplied blocks mali.clipByAnnotation("STATE", "".join(labels)) if tree: tree.rescaleBranchLengths(1.0) tree_options = "#=GF NH %s" % tree.to_string(branchlengths_only=True, format="nh") elif mali.getNumSequences() == 2: tree_options = "#=GF NH (%s:1.0)%s;" % tuple(map_old2new.values()) else: raise "Please supply a tree." mali.writeToFile(outfile, format="stockholm", write_ranges=False, options=(tree_options, )) outfile.close() ## prefix, code if options.shared_frequencies: frequency_codes = (("", ""), ) else: frequency_codes = blocks if options.insert_frequencies: for prefix, code in frequency_codes: temp_mali = mali.getClone() temp_mali.clipByAnnotation("STATE", code) RateEstimation.setFrequencies(input_model, temp_mali, prefix) if options.fix_frequencies: for prefix, code in frequency_codes: for char in ('a', 'c', 'g', 't'): for x in (0, 1, 2): param = "%sp%s%i" % (prefix, char, x) input_model.mGrammar.moveVariableToConst(param) writeModel(input_model, "input", options) t1 = time.time() result = xgram.train(input_model, filename) if options.dump: options.stdlog.write("".join(result.mData)) options.stdlog.write("".join(result.mLog)) mali.writeToFile(options.stdlog, format="stockholm", write_ranges=False, options=(tree_options, )) t2 = time.time() trained_model = result.getModel() writeModel(trained_model, "trained", options) return result, mali, ids
(options.model, o_ds, o_omega, o_kappa)) ## load a grammar if options.model in ("sn", "akaksgc", "f3x4-four"): if options.model in ("sn", ): infile = open(XGram.PATH_DATA + "/sn.eg", "r") input_model = XGram.Parser.parseGrammar(infile.readlines()) elif options.model in ("akaksgc", ): infile = open(XGram.PATH_DATA + "/akaksgc.eg", "r") input_model = XGram.Parser.parseGrammar(infile.readlines()) elif options.model in ("f3x4-four", ): input_model = Codons.buildCodonML(codon_model=options.model, explicit_extension=True, fix_kappa=options.kappa == None, fix_omega=options.omega == None) ## set codon usage frequencies if options.insert_frequencies: mali = Mali.Mali() mali.readFromFile(open(options.insert_frequencies, "r"), format=options.input_format) if mali.getLength() == 0: raise "refusing to process empty alignment." frequencies = Codons.getFrequenciesPerCodonPosition( map(lambda x: x.mString, mali.values()))
options.stdlog.write("# input parameters: model=%s, ds=%s, omega=%s, kappa=%s\n" % ( options.model, o_ds, o_omega, o_kappa) ) ## load a grammar if options.model in ( "sn" , "akaksgc", "f3x4-four" ): if options.model in ("sn", ): infile = open(XGram.PATH_DATA + "/sn.eg", "r") input_model = XGram.Parser.parseGrammar( infile.readlines() ) elif options.model in ( "akaksgc", ): infile = open(XGram.PATH_DATA + "/akaksgc.eg", "r") input_model = XGram.Parser.parseGrammar( infile.readlines() ) elif options.model in ( "f3x4-four", ): input_model = Codons.buildCodonML(codon_model = options.model, explicit_extension = True, fix_kappa = options.kappa == None, fix_omega = options.omega == None ) ## set codon usage frequencies if options.insert_frequencies: mali = Mali.Mali() mali.readFromFile( open(options.insert_frequencies, "r"), format = options.input_format ) if mali.getLength() == 0: raise "refusing to process empty alignment." frequencies = Codons.getFrequenciesPerCodonPosition( map( lambda x: x.mString, mali.values() ))
def buildAndCheckModel(self, codon_model, **kwargs): """build various models checking parameter settings.""" model = Codons.buildCodonML(codon_model = codon_model, **kwargs ) self.checkModel( model ) model = Codons.buildCodonML(codon_model = codon_model, fix_kappa = True, **kwargs ) self.checkModel( model ) model = Codons.buildCodonML(codon_model = codon_model, fix_omega = True, **kwargs ) self.checkModel( model ) model = Codons.buildCodonML(codon_model = codon_model, fix_omega = True, fix_kappa = True, **kwargs ) self.checkModel( model ) model = Codons.buildCodonML( codon_model, num_blocks=2, grammar_type="linear-blocks", shared_frequencies = False, shared_rates = False, **kwargs ) self.checkModel(model) num_blocks = 2 model = Codons.buildCodonML( codon_model, num_blocks=num_blocks, grammar_type="linear-blocks", shared_frequencies = True, shared_rates = False, **kwargs) self.checkModel(model) num_blocks = 2 model = Codons.buildCodonML( codon_model, num_blocks=num_blocks, grammar_type="linear-blocks", shared_frequencies = False, shared_rates = True, **kwargs) self.checkModel(model) num_blocks = 2 model = Codons.buildCodonML( codon_model, num_blocks=num_blocks, grammar_type="linear-blocks", shared_frequencies = True, shared_rates = True, **kwargs) self.checkModel(model) ## test model with annotations ## build annotation labels = string.letters.upper() annotate_terminals = {} for x in range(num_blocks): annotations = [] key = [] for c in range( 0,3 ): t = "B%i_COD%i" % (x, c) key.append(t) annotations.append( Annotation( row = "STATE", column = t, label = labels[x % len(labels)] )) annotate_terminals[ tuple(key) ] = annotations model = Codons.buildCodonML( codon_model, num_blocks=2, grammar_type="linear-blocks", shared_frequencies = True, annotate_terminals = annotate_terminals, **kwargs ) # print model.getGrammar() self.checkModel(model)
def test_blocks( codon_models, fix_frequencies, insert_frequencies, data, xgram): """module to best codon models: linear versus blocks. """ t0 = time.time() num_blocks = 2 ## build annotation labels = string.letters.upper() annotate_terminals = {} for x in range(num_blocks): annotations = [] key = [] for c in range( 0,3 ): t = "B%i_POS%i" % (x, c) key.append(t) annotations.append( Annotation( row = "STATE", column = t, label = labels[x % len(labels)] )) key = tuple(key) annotate_terminals[ key ] = annotations for codon_model in codon_models: print "###############################################################" print "executing dart/codeml with codon model %s" % codon_model t1 = time.time() model_free = Codons.buildCodonML(codon_model = codon_model, grammar_type = "linear-blocks", annotate_terminals = annotate_terminals, num_blocks = num_blocks, fix_frequencies = fix_frequencies ) if insert_frequencies: setFrequencies( model_fixed, data ) trained_model_free = xgram.train( model_free, data ) print trained_model_free.getData() t2 = time.time() print "calculated free model in %i seconds: Likelihood = %f" % (t2 - t1, trained_model_free.getLogLikelihood()) t1 = time.time() model_fixed = Codons.buildCodonML( codon_model = codon_model, grammar_type = "linear-blocks", num_blocks = 2, fix_frequencies = fix_frequencies, fix_omega = True) if insert_frequencies: setFrequencies( model_fixed, data ) trained_model_fixed = xgram.train( model_fixed, data ) t2 = time.time() print "calculated fixed model in %i seconds: Likelihood = %f" % (t2 - t1, trained_model_fixed.getLogLikelihood()) t1 = time.time() model_uniform = Codons.buildCodonML( codon_model = codon_model, grammar_type = "linear", fix_frequencies = fix_frequencies ) if insert_frequencies: setFrequencies( model_uniform, data ) trained_model_uniform = xgram.train( model_uniform, data ) t2 = time.time() print "calculated uniform model in %i seconds: Likelihood = %f" % (t2 - t1, trained_model_uniform.getLogLikelihood()) for id in range(num_blocks): print "## kaks for block %i" % id prefix = "B%i_" % id terminals = ("%sPOS0" % prefix, "%sPOS1" % prefix, "%sPOS2" % prefix) vka, vks = calculateKaKs( trained_model_fixed.getModel(), terminals = terminals, prefix = prefix, per_site = True ) print "per site - implicit:\tks= %f, ka = %f, ka/ks=%f, n=%i, L=%f" %\ (vks, vka, vka/vks, trained_model_free.getNumIterations(), trained_model_free.getLogLikelihood() ) # print trained_model.getIterations() # print "###################################" # print trained_model.getLog() # print "###################################" vka, vks = calculateKaKs( trained_model_free.getModel(), terminals = terminals, prefix = prefix ) print "per codon - w variable:\tks= %f, ka = %f, ka/ks=%f, n=%i, L=%f" %\ (vks, vka, vka/vks, trained_model_free.getNumIterations(), trained_model_free.getLogLikelihood() ) fka, fks = calculateKaKs( trained_model_fixed.getModel(), terminals = terminals, prefix= prefix ) print "per codon - w fixed:\tks= %f, ka = %f, ka/ks=%f, n=%i, L=%f" % \ (fks, fka, fka/fks, trained_model_fixed.getNumIterations(), trained_model_fixed.getLogLikelihood() ) # normalize vks, vka, fks and fka fks /= fks + fka fka /= fks + fka vks /= vks + vka vka /= vks + vka ks = vks / ( 3 * fks ) ka = vka / ( 3 * fka ) print "per site - explicit:\tks= %f, ka = %f, ka/ks=%f" % (ks, ka, ka/ks) print 'test finished in %i seconds.' % (time.time() - t0)
def test_codon_models( codon_models, fix_frequencies, insert_frequencies, data, xgram): """module to compare codon models. """ t0 = time.time() for codon_model in codon_models: print "###############################################################" print "executing dart/codeml with codon model %s" % codon_model generated_model = Codons.buildCodonML(codon_model = codon_model, fix_frequencies = fix_frequencies ) if insert_frequencies: setFrequencies( generated_model, data) t1 = time.time() trained_model = xgram.train( generated_model, data ) t2 = time.time() vka, vks = calculateKaKs( trained_model.getModel(), per_site = True ) print "per site - implicit:\tks= %f, ka = %f, ka/ks=%f, n=%i, L=%f" %\ (vks, vka, vka/vks, trained_model.getNumIterations(), trained_model.getLogLikelihood() ) # print trained_model.getIterations() # print "###################################" # print trained_model.getLog() # print "###################################" vka, vks = calculateKaKs( trained_model.getModel() ) print "per codon - w variable:\tks= %f, ka = %f, ka/ks=%f, n=%i, L=%f" %\ (vks, vka, vka/vks, trained_model.getNumIterations(), trained_model.getLogLikelihood() ) model_fixed = Codons.buildCodonML( codon_model = codon_model, fix_frequencies = fix_frequencies, fix_omega = True) if insert_frequencies: setFrequencies( model_fixed, data ) t3 = time.time() trained_model_fixed = xgram.train( model_fixed, data ) t4 = time.time() fka, fks = calculateKaKs( trained_model_fixed.getModel() ) print "per codon - w fixed:\tks= %f, ka = %f, ka/ks=%f, n=%i, L=%f" % \ (fks, fka, fka/fks, trained_model_fixed.getNumIterations(), trained_model_fixed.getLogLikelihood() ) # normalize vks, vka, fks and fka fks /= fks + fka fka /= fks + fka vks /= vks + vka vka /= vks + vka ks = vks / ( 3 * fks ) ka = vka / ( 3 * fka ) print "per site - explicit:\tks= %f, ka = %f, ka/ks=%f" % (ks, ka, ka/ks) print "execution time: %i %i %i" % ( t2 - t1, t4-t3, t4 - t1 ) print 'dart/codeml finished in %i seconds.' % (time.time() - t0)
def prepareGrammar( xgram, mali, tree, map_old2new, blocks, options ): """prepare grammar for custom grammars.""" labels = map( lambda x: x[1], blocks ) nblocks = len(blocks) annotate_terminals = {} for x in range(len(labels)): annotations = [] key = [] for c in range( 0,3 ): t = "B%i_COD%i" % (x, c) key.append(t) annotations.append( Annotation( row = "STATE", column = t, label = labels[x] )) annotate_terminals[ tuple(key) ] = annotations input_model = Codons.buildCodonML( codon_model = "f3x4-fourproducts", num_blocks = nblocks, grammar_type = "linear-blocks", annotate_terminals=annotate_terminals, shared_frequencies = options.shared_frequencies, shared_rates = False, ) ## manually share rates between blocks if options.shared_rates == "kappa": for c in range( 0, nblocks): input_model.renameParameter( "B%i_Ri" % c, "Ri" ) input_model.renameParameter( "B%i_Rv" % c, "Rv" ) elif options.shared_rates == "kappa-ds": for c in range( 0, nblocks): input_model.renameParameter( "B%i_Ri" % c, "Ri" ) input_model.renameParameter( "B%i_Rv" % c, "Rv" ) input_model.renameParameter( "B%i_Rs" % c, "Rs" ) elif options.shared_rates == "omega": for c in range( 0, nblocks): input_model.renameParameter( "B%i_Rs" % c, "Rs" ) input_model.renameParameter( "B%i_Rn" % c, "Rn" ) elif options.shared_rates == "omega-ds": for c in range( 0, nblocks): input_model.renameParameter( "B%i_Rv" % c, "Rv" ) input_model.renameParameter( "B%i_Rs" % c, "Rs" ) input_model.renameParameter( "B%i_Rn" % c, "Rn" ) elif options.shared_rates == "ds": for c in range( 0, nblocks): input_model.renameParameter( "B%i_Rs" % c, "Rs" ) elif options.shared_rates == "all": for c in range( 0, nblocks): input_model.renameParameter( "B%i_Rv" % c, "Rv" ) input_model.renameParameter( "B%i_Rs" % c, "Rs" ) input_model.renameParameter( "B%i_Rn" % c, "Rn" ) input_model.renameParameter( "B%i_Ri" % c, "Ri" ) writeModel( input_model, "input", options ) ids = mali.getIdentifiers() fh, filename = tempfile.mkstemp() os.close(fh) outfile = open(filename, "w" ) ## clip mali by supplied blocks mali.clipByAnnotation( "STATE", "".join(labels)) if tree: tree.rescaleBranchLengths( 1.0 ) tree_options = "#=GF NH %s" % tree.to_string( branchlengths_only=True, format="nh") elif mali.getNumSequences() == 2: tree_options = "#=GF NH (%s:1.0)%s;" % tuple(map_old2new.values()) else: raise "Please supply a tree." mali.writeToFile( outfile, format="stockholm", write_ranges = False, options = ( tree_options, ) ) outfile.close() ## prefix, code if options.shared_frequencies: frequency_codes = ( ("", ""), ) else: frequency_codes = blocks if options.insert_frequencies: for prefix, code in frequency_codes: temp_mali = mali.getClone() temp_mali.clipByAnnotation( "STATE", code ) RateEstimation.setFrequencies( input_model, temp_mali, prefix ) if options.fix_frequencies: for prefix, code in frequency_codes: for char in ('a', 'c', 'g', 't'): for x in (0, 1, 2): param = "%sp%s%i" % (prefix, char, x) input_model.mGrammar.moveVariableToConst( param ) writeModel( input_model, "input", options ) t1 = time.time() result = xgram.train( input_model, filename ) if options.dump: options.stdlog.write( "".join(result.mData) ) options.stdlog.write( "".join(result.mLog) ) mali.writeToFile( options.stdlog, format="stockholm", write_ranges = False, options = (tree_options,)) t2 = time.time() trained_model = result.getModel() writeModel( trained_model, "trained", options ) return result, mali, ids