Exemplo n.º 1
0
def buildCodonML( codon_model = "f3x4-four", 
                  grammar_type = "linear",
                  num_blocks = 2,
                  explicit_extension = False,
                  fix_omega = False,
                  fix_kappa = False,
                  fix_frequencies = False,
                  shared_frequencies = False,
                  shared_rates = False,
                  annotate_terminals = None,
                  codon_frequencies = None ):
    """build a codonml-like model.
    
    fix_omega: If true, omega is set to 1. Thus the model does not distinguish
        between synonymous and non-synonymous transitions. Use this option
        to estimate the number of synonymous and non-synonymous sites.
        (The original rate parameters are per codon.)
    
    fix_kappa: If true, kappa is set to 1. Thus the model does not estimate
        the transition/transversion ratio.
     
    fix_frequencies: if True: frequencies of the codons are set to const
        and are not estimated. The corresponding rate parameters should be
        added afterwards.
    
    grammar_type can be one of "linear", "linear-blocks".
        linear: simple linear sequence with gaps
        linear-blocks: blocks of linear sequence
    
    codon_model can be one of "f3x4-two", "f3x4-four"
        f3x4-two: two parameter parameterization (a al CodonML)
            Codon frequencies are given by the nucleotide frequencies at each position.
        f3x4-four: four parameter parameterization
            (better convergence properties). 
            Codon frequencies are given by the nucleotide frequencies at each position.
        codons-four: four parameter parametrization with explicitely
            given codon frequencies. These are added as constant
            expressions into the grammar.
            
    num_blocks: the number of blocks for the "linear-blocks" grammar.
            
    shared_frequencies: the nucleotide frequencies are shared between blocks.
        The default (false) has separate nucleotide frequencies for each block.
            
    shared_rates: the rates between blocks are shared. The default (false)
        has separate rates for each block.
            
    annotate_terminals:
        The linear-blocks grammar allows to annotate the input
        alignments. Provide a hash mapping the emitted states to
        Annotations. 
        
    explicit_extension:
        use an explicit parameter for the markov chain.
    """
    
    model = XGram.Model.Model()
    grammar = XGram.Model.Grammar()
    alphabet = XGram.Model.Alphabet()
    
    if grammar_type == "linear":
        chain = XGram.Model.Chain()    
        
        generator_chain = ChainCodonML(fix_omega = fix_omega,
                                       fix_kappa = fix_kappa,
                                       fix_frequencies = fix_frequencies,
                                       codon_frequencies = codon_frequencies,
                                       codon_model = codon_model )
        generator_chain.buildGrammar( chain )
    
        grammar.addChain( chain )
        generator_grammar = GrammarLinearSequence( explicit_extension = explicit_extension )  
        
    elif grammar_type == "linear-blocks":
            
        for x in range( num_blocks ):
            chain = XGram.Model.Chain()    
            generator_chain = ChainCodonML(fix_omega = fix_omega,
                                           fix_kappa = fix_kappa,
                                           fix_frequencies = fix_frequencies,
                                           shared_frequencies = shared_frequencies,
                                           shared_rates = shared_rates,
                                           codon_model = codon_model,
                                           codon_frequencies = codon_frequencies,
                                           prefix = "B%x_" % x )
            
            generator_chain.buildGrammar( chain )
            grammar.addChain( chain )

        generator_grammar = GrammarLinearSequenceBlocks( annotate_terminals = annotate_terminals )
    else:
        raise XGram.Exceptions.UsageError( 'unknown grammar %s' % grammar_type )
        
    generator_grammar.buildGrammar( grammar )     
    grammar.setName( "codonML" )
        
    generator_alphabet = AlphabetDNA()
    generator_alphabet.buildGrammar(alphabet)
     
    model.setAlphabet( alphabet )
    model.setGrammar( grammar )
    
    return model


    
Exemplo n.º 2
0
Arquivo: DNA.py Projeto: B-Rich/dart
def buildModel( substitution_model = "jc69",
                grammar_type = "linear",
                num_blocks = 1,
                shared_frequencies = False,
                shared_rates = False,
                annotate_terminals = False,
                explicit_extension = False,
                ):
    """build a standard nucleotide models.
    
    model: can be either one of
        jc69: Jukes-Cantor model
        k80: Kimura 2-parameter model
        gtr: general time reversible model (9 parameters)
        rev: general 12 parameter model
        
    exclicit_extension: add an explicit extension probability
    (called ext and not_ext) to the grammar.

    grammar_type can be one of "linear", "linear-blocks".
            linear: simple linear sequence with gaps
            linear-blocks: blocks of linear sequence

    num_blocks: the number of blocks for the "linear-blocks" grammar.
            
    shared_frequencies: the nucleotide frequencies are shared between blocks.
        The default (false) has separate nucleotide frequencies for each block.
            
    shared_rates: the rates between blocks are shared. The default (false)
        has separate rates for each block.
            
    annotate_terminals:
        The linear-blocks grammar allows to annotate the input
        alignments. Provide a hash mapping the emitted states to
        Annotations. 
                    
    """
    
    model = XGram.Model.Model()
    grammar = XGram.Model.Grammar()
    alphabet = XGram.Model.Alphabet()
            
    if substitution_model == "jc69":
        generator_initial_states = InitialStatesConst
        generator_transitions = TransitionsConst
    elif substitution_model == "k80":
        generator_initial_states = InitialStatesConst
        generator_transitions = TransitionsK80
    elif substitution_model == "rev":
        generator_initial_states = InitialStatesConst
        generator_transitions = TransitionsConst                   
    elif substitution_model == "gtr":
        generator_initial_states = InitialStatesParametric
        generator_transitions = TransitionsGTR              
    else:
        raise Exceptions.UsageError( "model %s unknown" % substitution_model )
        

    generator_grammar = GrammarLinearSequence( explicit_extension = explicit_extension )      

    if grammar_type == "linear":
        chain = XGram.Model.Chain()     
        generator_chain = ChainDNA( generator_initial_states(),
                                    generator_transitions() )

        
        generator_chain.buildGrammar( chain )

        if substitution_model == "rev":
            chain.setUpdatePolicy( "rev" )

        grammar.addChain( chain )
        generator_grammar = GrammarLinearSequence( explicit_extension = explicit_extension )  
        
    elif grammar_type in ("linear-blocks", "multiple-blocks" ) :
        for x in range( num_blocks ):
            chain = XGram.Model.Chain()    
            prefix = "B%x" % x
            generator_chain = ChainDNA( generator_initial_states( shared_rates = shared_rates,
                                                                  shared_frequencies = shared_frequencies,
                                                                  prefix = prefix ),
                                        generator_transitions( shared_rates = shared_rates,
                                                               shared_frequencies = shared_frequencies,
                                                               prefix=prefix ),
                                        prefix = prefix )
            if substitution_model == "rev":
                chain.setUpdatePolicy( "rev" )

            generator_chain.buildGrammar( chain )
            grammar.addChain( chain )
        if grammar_type == "multiple-blocks":
            generator_grammar = GrammarLinearSequenceMultipleChains( annotate_terminals = annotate_terminals )            
        elif grammar_type == "linear-blocks":
            generator_grammar = GrammarLinearSequenceBlocks( annotate_terminals = annotate_terminals )
    else:
        raise XGram.Exceptions.UsageError( 'unknown grammar %s' % grammar_type )
        
    generator_grammar.buildGrammar( grammar )
    grammar.setName( substitution_model )
    grammar.addComment( "grammar built by DNA.py " )
    grammar.addComment( "  shared rates = %s" % shared_rates)
    grammar.addComment( "  shared frequencies = %s" % shared_frequencies)
    grammar.addComment( "  type = %s" % grammar_type)
    grammar.addComment( "  nblocks = %i" % num_blocks)
    grammar.addComment( "  substitution model = %s" % substitution_model)
            
    generator_alphabet = AlphabetDNA()
    generator_alphabet.buildGrammar(alphabet)
     
    model.setAlphabet( alphabet )
    model.setGrammar( grammar )
    
    return model
Exemplo n.º 3
0
def buildCodonML(codon_model="f3x4-four",
                 grammar_type="linear",
                 num_blocks=2,
                 explicit_extension=False,
                 fix_omega=False,
                 fix_kappa=False,
                 fix_frequencies=False,
                 shared_frequencies=False,
                 shared_rates=False,
                 annotate_terminals=None,
                 codon_frequencies=None):
    """build a codonml-like model.
    
    fix_omega: If true, omega is set to 1. Thus the model does not distinguish
        between synonymous and non-synonymous transitions. Use this option
        to estimate the number of synonymous and non-synonymous sites.
        (The original rate parameters are per codon.)
    
    fix_kappa: If true, kappa is set to 1. Thus the model does not estimate
        the transition/transversion ratio.
     
    fix_frequencies: if True: frequencies of the codons are set to const
        and are not estimated. The corresponding rate parameters should be
        added afterwards.
    
    grammar_type can be one of "linear", "linear-blocks".
        linear: simple linear sequence with gaps
        linear-blocks: blocks of linear sequence
    
    codon_model can be one of "f3x4-two", "f3x4-four"
        f3x4-two: two parameter parameterization (a al CodonML)
            Codon frequencies are given by the nucleotide frequencies at each position.
        f3x4-four: four parameter parameterization
            (better convergence properties). 
            Codon frequencies are given by the nucleotide frequencies at each position.
        codons-four: four parameter parametrization with explicitely
            given codon frequencies. These are added as constant
            expressions into the grammar.
            
    num_blocks: the number of blocks for the "linear-blocks" grammar.
            
    shared_frequencies: the nucleotide frequencies are shared between blocks.
        The default (false) has separate nucleotide frequencies for each block.
            
    shared_rates: the rates between blocks are shared. The default (false)
        has separate rates for each block.
            
    annotate_terminals:
        The linear-blocks grammar allows to annotate the input
        alignments. Provide a hash mapping the emitted states to
        Annotations. 
        
    explicit_extension:
        use an explicit parameter for the markov chain.
    """

    model = XGram.Model.Model()
    grammar = XGram.Model.Grammar()
    alphabet = XGram.Model.Alphabet()

    if grammar_type == "linear":
        chain = XGram.Model.Chain()

        generator_chain = ChainCodonML(fix_omega=fix_omega,
                                       fix_kappa=fix_kappa,
                                       fix_frequencies=fix_frequencies,
                                       codon_frequencies=codon_frequencies,
                                       codon_model=codon_model)
        generator_chain.buildGrammar(chain)

        grammar.addChain(chain)
        generator_grammar = GrammarLinearSequence(
            explicit_extension=explicit_extension)

    elif grammar_type == "linear-blocks":

        for x in range(num_blocks):
            chain = XGram.Model.Chain()
            generator_chain = ChainCodonML(
                fix_omega=fix_omega,
                fix_kappa=fix_kappa,
                fix_frequencies=fix_frequencies,
                shared_frequencies=shared_frequencies,
                shared_rates=shared_rates,
                codon_model=codon_model,
                codon_frequencies=codon_frequencies,
                prefix="B%x_" % x)

            generator_chain.buildGrammar(chain)
            grammar.addChain(chain)

        generator_grammar = GrammarLinearSequenceBlocks(
            annotate_terminals=annotate_terminals)
    else:
        raise XGram.Exceptions.UsageError('unknown grammar %s' % grammar_type)

    generator_grammar.buildGrammar(grammar)
    grammar.setName("codonML")

    generator_alphabet = AlphabetDNA()
    generator_alphabet.buildGrammar(alphabet)

    model.setAlphabet(alphabet)
    model.setGrammar(grammar)

    return model
Exemplo n.º 4
0
def buildModel(
    substitution_model="jc69",
    grammar_type="linear",
    num_blocks=1,
    shared_frequencies=False,
    shared_rates=False,
    annotate_terminals=False,
    explicit_extension=False,
):
    """build a standard nucleotide models.
    
    model: can be either one of
        jc69: Jukes-Cantor model
        k80: Kimura 2-parameter model
        gtr: general time reversible model (9 parameters)
        rev: general 12 parameter model
        
    exclicit_extension: add an explicit extension probability
    (called ext and not_ext) to the grammar.

    grammar_type can be one of "linear", "linear-blocks".
            linear: simple linear sequence with gaps
            linear-blocks: blocks of linear sequence

    num_blocks: the number of blocks for the "linear-blocks" grammar.
            
    shared_frequencies: the nucleotide frequencies are shared between blocks.
        The default (false) has separate nucleotide frequencies for each block.
            
    shared_rates: the rates between blocks are shared. The default (false)
        has separate rates for each block.
            
    annotate_terminals:
        The linear-blocks grammar allows to annotate the input
        alignments. Provide a hash mapping the emitted states to
        Annotations. 
                    
    """

    model = XGram.Model.Model()
    grammar = XGram.Model.Grammar()
    alphabet = XGram.Model.Alphabet()

    if substitution_model == "jc69":
        generator_initial_states = InitialStatesConst
        generator_transitions = TransitionsConst
    elif substitution_model == "k80":
        generator_initial_states = InitialStatesConst
        generator_transitions = TransitionsK80
    elif substitution_model == "rev":
        generator_initial_states = InitialStatesConst
        generator_transitions = TransitionsConst
    elif substitution_model == "gtr":
        generator_initial_states = InitialStatesParametric
        generator_transitions = TransitionsGTR
    else:
        raise Exceptions.UsageError("model %s unknown" % substitution_model)

    generator_grammar = GrammarLinearSequence(
        explicit_extension=explicit_extension)

    if grammar_type == "linear":
        chain = XGram.Model.Chain()
        generator_chain = ChainDNA(generator_initial_states(),
                                   generator_transitions())

        generator_chain.buildGrammar(chain)

        if substitution_model == "rev":
            chain.setUpdatePolicy("rev")

        grammar.addChain(chain)
        generator_grammar = GrammarLinearSequence(
            explicit_extension=explicit_extension)

    elif grammar_type in ("linear-blocks", "multiple-blocks"):
        for x in range(num_blocks):
            chain = XGram.Model.Chain()
            prefix = "B%x" % x
            generator_chain = ChainDNA(
                generator_initial_states(shared_rates=shared_rates,
                                         shared_frequencies=shared_frequencies,
                                         prefix=prefix),
                generator_transitions(shared_rates=shared_rates,
                                      shared_frequencies=shared_frequencies,
                                      prefix=prefix),
                prefix=prefix)
            if substitution_model == "rev":
                chain.setUpdatePolicy("rev")

            generator_chain.buildGrammar(chain)
            grammar.addChain(chain)
        if grammar_type == "multiple-blocks":
            generator_grammar = GrammarLinearSequenceMultipleChains(
                annotate_terminals=annotate_terminals)
        elif grammar_type == "linear-blocks":
            generator_grammar = GrammarLinearSequenceBlocks(
                annotate_terminals=annotate_terminals)
    else:
        raise XGram.Exceptions.UsageError('unknown grammar %s' % grammar_type)

    generator_grammar.buildGrammar(grammar)
    grammar.setName(substitution_model)
    grammar.addComment("grammar built by DNA.py ")
    grammar.addComment("  shared rates = %s" % shared_rates)
    grammar.addComment("  shared frequencies = %s" % shared_frequencies)
    grammar.addComment("  type = %s" % grammar_type)
    grammar.addComment("  nblocks = %i" % num_blocks)
    grammar.addComment("  substitution model = %s" % substitution_model)

    generator_alphabet = AlphabetDNA()
    generator_alphabet.buildGrammar(alphabet)

    model.setAlphabet(alphabet)
    model.setGrammar(grammar)

    return model