def buildCodonML( codon_model = "f3x4-four", grammar_type = "linear", num_blocks = 2, explicit_extension = False, fix_omega = False, fix_kappa = False, fix_frequencies = False, shared_frequencies = False, shared_rates = False, annotate_terminals = None, codon_frequencies = None ): """build a codonml-like model. fix_omega: If true, omega is set to 1. Thus the model does not distinguish between synonymous and non-synonymous transitions. Use this option to estimate the number of synonymous and non-synonymous sites. (The original rate parameters are per codon.) fix_kappa: If true, kappa is set to 1. Thus the model does not estimate the transition/transversion ratio. fix_frequencies: if True: frequencies of the codons are set to const and are not estimated. The corresponding rate parameters should be added afterwards. grammar_type can be one of "linear", "linear-blocks". linear: simple linear sequence with gaps linear-blocks: blocks of linear sequence codon_model can be one of "f3x4-two", "f3x4-four" f3x4-two: two parameter parameterization (a al CodonML) Codon frequencies are given by the nucleotide frequencies at each position. f3x4-four: four parameter parameterization (better convergence properties). Codon frequencies are given by the nucleotide frequencies at each position. codons-four: four parameter parametrization with explicitely given codon frequencies. These are added as constant expressions into the grammar. num_blocks: the number of blocks for the "linear-blocks" grammar. shared_frequencies: the nucleotide frequencies are shared between blocks. The default (false) has separate nucleotide frequencies for each block. shared_rates: the rates between blocks are shared. The default (false) has separate rates for each block. annotate_terminals: The linear-blocks grammar allows to annotate the input alignments. Provide a hash mapping the emitted states to Annotations. explicit_extension: use an explicit parameter for the markov chain. """ model = XGram.Model.Model() grammar = XGram.Model.Grammar() alphabet = XGram.Model.Alphabet() if grammar_type == "linear": chain = XGram.Model.Chain() generator_chain = ChainCodonML(fix_omega = fix_omega, fix_kappa = fix_kappa, fix_frequencies = fix_frequencies, codon_frequencies = codon_frequencies, codon_model = codon_model ) generator_chain.buildGrammar( chain ) grammar.addChain( chain ) generator_grammar = GrammarLinearSequence( explicit_extension = explicit_extension ) elif grammar_type == "linear-blocks": for x in range( num_blocks ): chain = XGram.Model.Chain() generator_chain = ChainCodonML(fix_omega = fix_omega, fix_kappa = fix_kappa, fix_frequencies = fix_frequencies, shared_frequencies = shared_frequencies, shared_rates = shared_rates, codon_model = codon_model, codon_frequencies = codon_frequencies, prefix = "B%x_" % x ) generator_chain.buildGrammar( chain ) grammar.addChain( chain ) generator_grammar = GrammarLinearSequenceBlocks( annotate_terminals = annotate_terminals ) else: raise XGram.Exceptions.UsageError( 'unknown grammar %s' % grammar_type ) generator_grammar.buildGrammar( grammar ) grammar.setName( "codonML" ) generator_alphabet = AlphabetDNA() generator_alphabet.buildGrammar(alphabet) model.setAlphabet( alphabet ) model.setGrammar( grammar ) return model
def buildModel( substitution_model = "jc69", grammar_type = "linear", num_blocks = 1, shared_frequencies = False, shared_rates = False, annotate_terminals = False, explicit_extension = False, ): """build a standard nucleotide models. model: can be either one of jc69: Jukes-Cantor model k80: Kimura 2-parameter model gtr: general time reversible model (9 parameters) rev: general 12 parameter model exclicit_extension: add an explicit extension probability (called ext and not_ext) to the grammar. grammar_type can be one of "linear", "linear-blocks". linear: simple linear sequence with gaps linear-blocks: blocks of linear sequence num_blocks: the number of blocks for the "linear-blocks" grammar. shared_frequencies: the nucleotide frequencies are shared between blocks. The default (false) has separate nucleotide frequencies for each block. shared_rates: the rates between blocks are shared. The default (false) has separate rates for each block. annotate_terminals: The linear-blocks grammar allows to annotate the input alignments. Provide a hash mapping the emitted states to Annotations. """ model = XGram.Model.Model() grammar = XGram.Model.Grammar() alphabet = XGram.Model.Alphabet() if substitution_model == "jc69": generator_initial_states = InitialStatesConst generator_transitions = TransitionsConst elif substitution_model == "k80": generator_initial_states = InitialStatesConst generator_transitions = TransitionsK80 elif substitution_model == "rev": generator_initial_states = InitialStatesConst generator_transitions = TransitionsConst elif substitution_model == "gtr": generator_initial_states = InitialStatesParametric generator_transitions = TransitionsGTR else: raise Exceptions.UsageError( "model %s unknown" % substitution_model ) generator_grammar = GrammarLinearSequence( explicit_extension = explicit_extension ) if grammar_type == "linear": chain = XGram.Model.Chain() generator_chain = ChainDNA( generator_initial_states(), generator_transitions() ) generator_chain.buildGrammar( chain ) if substitution_model == "rev": chain.setUpdatePolicy( "rev" ) grammar.addChain( chain ) generator_grammar = GrammarLinearSequence( explicit_extension = explicit_extension ) elif grammar_type in ("linear-blocks", "multiple-blocks" ) : for x in range( num_blocks ): chain = XGram.Model.Chain() prefix = "B%x" % x generator_chain = ChainDNA( generator_initial_states( shared_rates = shared_rates, shared_frequencies = shared_frequencies, prefix = prefix ), generator_transitions( shared_rates = shared_rates, shared_frequencies = shared_frequencies, prefix=prefix ), prefix = prefix ) if substitution_model == "rev": chain.setUpdatePolicy( "rev" ) generator_chain.buildGrammar( chain ) grammar.addChain( chain ) if grammar_type == "multiple-blocks": generator_grammar = GrammarLinearSequenceMultipleChains( annotate_terminals = annotate_terminals ) elif grammar_type == "linear-blocks": generator_grammar = GrammarLinearSequenceBlocks( annotate_terminals = annotate_terminals ) else: raise XGram.Exceptions.UsageError( 'unknown grammar %s' % grammar_type ) generator_grammar.buildGrammar( grammar ) grammar.setName( substitution_model ) grammar.addComment( "grammar built by DNA.py " ) grammar.addComment( " shared rates = %s" % shared_rates) grammar.addComment( " shared frequencies = %s" % shared_frequencies) grammar.addComment( " type = %s" % grammar_type) grammar.addComment( " nblocks = %i" % num_blocks) grammar.addComment( " substitution model = %s" % substitution_model) generator_alphabet = AlphabetDNA() generator_alphabet.buildGrammar(alphabet) model.setAlphabet( alphabet ) model.setGrammar( grammar ) return model
def buildCodonML(codon_model="f3x4-four", grammar_type="linear", num_blocks=2, explicit_extension=False, fix_omega=False, fix_kappa=False, fix_frequencies=False, shared_frequencies=False, shared_rates=False, annotate_terminals=None, codon_frequencies=None): """build a codonml-like model. fix_omega: If true, omega is set to 1. Thus the model does not distinguish between synonymous and non-synonymous transitions. Use this option to estimate the number of synonymous and non-synonymous sites. (The original rate parameters are per codon.) fix_kappa: If true, kappa is set to 1. Thus the model does not estimate the transition/transversion ratio. fix_frequencies: if True: frequencies of the codons are set to const and are not estimated. The corresponding rate parameters should be added afterwards. grammar_type can be one of "linear", "linear-blocks". linear: simple linear sequence with gaps linear-blocks: blocks of linear sequence codon_model can be one of "f3x4-two", "f3x4-four" f3x4-two: two parameter parameterization (a al CodonML) Codon frequencies are given by the nucleotide frequencies at each position. f3x4-four: four parameter parameterization (better convergence properties). Codon frequencies are given by the nucleotide frequencies at each position. codons-four: four parameter parametrization with explicitely given codon frequencies. These are added as constant expressions into the grammar. num_blocks: the number of blocks for the "linear-blocks" grammar. shared_frequencies: the nucleotide frequencies are shared between blocks. The default (false) has separate nucleotide frequencies for each block. shared_rates: the rates between blocks are shared. The default (false) has separate rates for each block. annotate_terminals: The linear-blocks grammar allows to annotate the input alignments. Provide a hash mapping the emitted states to Annotations. explicit_extension: use an explicit parameter for the markov chain. """ model = XGram.Model.Model() grammar = XGram.Model.Grammar() alphabet = XGram.Model.Alphabet() if grammar_type == "linear": chain = XGram.Model.Chain() generator_chain = ChainCodonML(fix_omega=fix_omega, fix_kappa=fix_kappa, fix_frequencies=fix_frequencies, codon_frequencies=codon_frequencies, codon_model=codon_model) generator_chain.buildGrammar(chain) grammar.addChain(chain) generator_grammar = GrammarLinearSequence( explicit_extension=explicit_extension) elif grammar_type == "linear-blocks": for x in range(num_blocks): chain = XGram.Model.Chain() generator_chain = ChainCodonML( fix_omega=fix_omega, fix_kappa=fix_kappa, fix_frequencies=fix_frequencies, shared_frequencies=shared_frequencies, shared_rates=shared_rates, codon_model=codon_model, codon_frequencies=codon_frequencies, prefix="B%x_" % x) generator_chain.buildGrammar(chain) grammar.addChain(chain) generator_grammar = GrammarLinearSequenceBlocks( annotate_terminals=annotate_terminals) else: raise XGram.Exceptions.UsageError('unknown grammar %s' % grammar_type) generator_grammar.buildGrammar(grammar) grammar.setName("codonML") generator_alphabet = AlphabetDNA() generator_alphabet.buildGrammar(alphabet) model.setAlphabet(alphabet) model.setGrammar(grammar) return model
def buildModel( substitution_model="jc69", grammar_type="linear", num_blocks=1, shared_frequencies=False, shared_rates=False, annotate_terminals=False, explicit_extension=False, ): """build a standard nucleotide models. model: can be either one of jc69: Jukes-Cantor model k80: Kimura 2-parameter model gtr: general time reversible model (9 parameters) rev: general 12 parameter model exclicit_extension: add an explicit extension probability (called ext and not_ext) to the grammar. grammar_type can be one of "linear", "linear-blocks". linear: simple linear sequence with gaps linear-blocks: blocks of linear sequence num_blocks: the number of blocks for the "linear-blocks" grammar. shared_frequencies: the nucleotide frequencies are shared between blocks. The default (false) has separate nucleotide frequencies for each block. shared_rates: the rates between blocks are shared. The default (false) has separate rates for each block. annotate_terminals: The linear-blocks grammar allows to annotate the input alignments. Provide a hash mapping the emitted states to Annotations. """ model = XGram.Model.Model() grammar = XGram.Model.Grammar() alphabet = XGram.Model.Alphabet() if substitution_model == "jc69": generator_initial_states = InitialStatesConst generator_transitions = TransitionsConst elif substitution_model == "k80": generator_initial_states = InitialStatesConst generator_transitions = TransitionsK80 elif substitution_model == "rev": generator_initial_states = InitialStatesConst generator_transitions = TransitionsConst elif substitution_model == "gtr": generator_initial_states = InitialStatesParametric generator_transitions = TransitionsGTR else: raise Exceptions.UsageError("model %s unknown" % substitution_model) generator_grammar = GrammarLinearSequence( explicit_extension=explicit_extension) if grammar_type == "linear": chain = XGram.Model.Chain() generator_chain = ChainDNA(generator_initial_states(), generator_transitions()) generator_chain.buildGrammar(chain) if substitution_model == "rev": chain.setUpdatePolicy("rev") grammar.addChain(chain) generator_grammar = GrammarLinearSequence( explicit_extension=explicit_extension) elif grammar_type in ("linear-blocks", "multiple-blocks"): for x in range(num_blocks): chain = XGram.Model.Chain() prefix = "B%x" % x generator_chain = ChainDNA( generator_initial_states(shared_rates=shared_rates, shared_frequencies=shared_frequencies, prefix=prefix), generator_transitions(shared_rates=shared_rates, shared_frequencies=shared_frequencies, prefix=prefix), prefix=prefix) if substitution_model == "rev": chain.setUpdatePolicy("rev") generator_chain.buildGrammar(chain) grammar.addChain(chain) if grammar_type == "multiple-blocks": generator_grammar = GrammarLinearSequenceMultipleChains( annotate_terminals=annotate_terminals) elif grammar_type == "linear-blocks": generator_grammar = GrammarLinearSequenceBlocks( annotate_terminals=annotate_terminals) else: raise XGram.Exceptions.UsageError('unknown grammar %s' % grammar_type) generator_grammar.buildGrammar(grammar) grammar.setName(substitution_model) grammar.addComment("grammar built by DNA.py ") grammar.addComment(" shared rates = %s" % shared_rates) grammar.addComment(" shared frequencies = %s" % shared_frequencies) grammar.addComment(" type = %s" % grammar_type) grammar.addComment(" nblocks = %i" % num_blocks) grammar.addComment(" substitution model = %s" % substitution_model) generator_alphabet = AlphabetDNA() generator_alphabet.buildGrammar(alphabet) model.setAlphabet(alphabet) model.setGrammar(grammar) return model