Exemplo n.º 1
0
def get_response_content(fs):
    # deserialize the xml data to create a DirectProteinMixture
    try:
        mixture_model = DirectProtein.deserialize_mixture_model(fs.model)
    except ValueError as e:
        raise HandlingError(e)
    expected_rate = mixture_model.get_expected_rate()
    codon_distribution = mixture_model.get_codon_stationary_distribution()
    aa_distribution = mixture_model.get_aa_stationary_distribution()
    nt_distribution = mixture_model.get_nt_stationary_distribution()
    ordered_codons = list(sorted(Codon.g_non_stop_codons))
    # show the summary
    out = StringIO()
    print >> out, 'expected codon substitution rate:'
    print >> out, expected_rate
    print >> out, ''
    print >> out, 'nucleotide distribution:'
    for nt, proportion in zip(Codon.g_nt_letters, nt_distribution):
        print >> out, '%s : %s' % (nt, proportion)
    print >> out, ''
    print >> out, 'amino acid distribution:'
    for aa, proportion in zip(Codon.g_aa_letters, aa_distribution):
        print >> out, '%s : %s' % (aa, proportion)
    print >> out, ''
    print >> out, 'codon distribution:'
    for codon, proportion in zip(ordered_codons, codon_distribution):
        print >> out, '%s : %s' % (codon, proportion)
    return out.getvalue()
Exemplo n.º 2
0
def get_response_content(fs):
    # get the nucleotide distribution
    nt_to_weight = SnippetUtil.get_distribution(fs.nucleotides,
            'nucleotide', nt_letters)
    # get the amino acid distribution
    aa_to_weight = SnippetUtil.get_distribution(fs.aminoacids,
            'amino acid', aa_letters)
    # get results
    mutation_distribution = [nt_to_weight[nt] for nt in nt_letters]
    aa_distribution = [aa_to_weight[aa] for aa in aa_letters]
    pair = DirectProtein.get_nt_distribution_and_aa_energies(
            mutation_distribution, aa_distribution)
    nt_distribution, aa_energies = pair
    # write something
    out = StringIO()
    # write the stationary nucleotide distribution
    print >> out, 'nucleotide stationary distribution:'
    for nt, value in zip(nt_letters, nt_distribution):
        print >> out, '%s : %s' % (nt, value)
    print >> out, ''
    # write the amino acid energies
    print >> out, 'amino acid energies:'
    for aa, value in zip(aa_letters, aa_energies):
        print >> out, '%s : %s' % (aa, value)
    # return the response
    return out.getvalue()
Exemplo n.º 3
0
def get_response_content(fs):
    # deserialize the xml data to create a DirectProteinMixture
    try:
        mixture_model = DirectProtein.deserialize_mixture_model(fs.model)
    except ValueError as e:
        raise HandlingError(e)
    expected_rate = mixture_model.get_expected_rate()
    codon_distribution = mixture_model.get_codon_stationary_distribution()
    aa_distribution = mixture_model.get_aa_stationary_distribution()
    nt_distribution = mixture_model.get_nt_stationary_distribution()
    ordered_codons = list(sorted(Codon.g_non_stop_codons))
    # show the summary
    out = StringIO()
    print >> out, 'expected codon substitution rate:'
    print >> out, expected_rate
    print >> out, ''
    print >> out, 'nucleotide distribution:'
    for nt, proportion in zip(Codon.g_nt_letters, nt_distribution):
        print >> out, '%s : %s' % (nt, proportion)
    print >> out, ''
    print >> out, 'amino acid distribution:'
    for aa, proportion in zip(Codon.g_aa_letters, aa_distribution):
        print >> out, '%s : %s' % (aa, proportion)
    print >> out, ''
    print >> out, 'codon distribution:'
    for codon, proportion in zip(ordered_codons, codon_distribution):
        print >> out, '%s : %s' % (codon, proportion)
    return out.getvalue()
Exemplo n.º 4
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default xml string
    default_xml_string = DirectProtein.get_sample_xml_string()
    # define the form objects
    return [Form.MultiLine('model', 'mixture model', default_xml_string)]
Exemplo n.º 5
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default xml string
    default_xml_string = DirectProtein.get_sample_xml_string()
    # define the form objects
    return [Form.MultiLine('model', 'mixture model', default_xml_string)]
Exemplo n.º 6
0
def get_form():
    """
    @return: the body of a form
    """
    # define the tree string
    tree_string = '(((Human:0.1, Chimpanzee:0.2):0.8, Gorilla:0.3):0.7, Orangutan:0.4, Gibbon:0.5);'
    tree = Newick.parse(tree_string, Newick.NewickTree)
    formatted_tree_string = Newick.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
            Form.MultiLine('tree', 'newick tree',
                formatted_tree_string),
            Form.MultiLine('model', 'Direct Protein mixture model',
                DirectProtein.get_sample_xml_string().strip()),
            Form.Integer('ncols', 'sample this many codon columns',
                100, low=1, high=1000)]
    return form_objects
Exemplo n.º 7
0
 def evaluate(self, X):
     """
     @param X: the three vars defining the mutation process nt distribution.
     @return: a tuple which is the zero vector when the guess was right.
     """
     if len(X) != 3:
         raise ValueError("incorrect number of parameters")
     x, y, z = X
     mutation_nt_weights = (1, math.exp(x), math.exp(y), math.exp(z))
     mutation_nt_dist = normalized(mutation_nt_weights)
     if not almost_equals(sum(mutation_nt_dist), 1.0):
         msg_a = "detected possibly invalid objective function in put: "
         msg_b = str(X)
         raise ValueError(msg_a + msg_b)
     pair = DirectProtein.get_nt_distribution_and_aa_energies(mutation_nt_dist, self.aa_dist)
     stationary_nt_dist, aa_energies = pair
     evaluation = tuple(math.log(a / b) for a, b in zip(self.nt_dist[:3], stationary_nt_dist[:3]))
     return evaluation, aa_energies
Exemplo n.º 8
0
 def __call__(self, X):
     """
     The input is a triple.
     These three variables define the mutation process
     nucleotide distribution
     @param X: a triple
     @return: the zero vector when the guess was right
     """
     self.guesses.append(X)
     if len(X) != 3:
         raise ValueError('incorrect number of parameters')
     x, y, z = X
     mutation_nt_weights = (1, math.exp(x), math.exp(y), math.exp(z))
     mutation_nt_dist = normalized(mutation_nt_weights)
     pair = DirectProtein.get_nt_distribution_and_aa_energies(
             mutation_nt_dist, self.aa_dist)
     stationary_nt_dist, aa_energies = pair
     return tuple(math.log(a/b)
             for a, b in zip(self.nt_dist[:3], stationary_nt_dist[:3]))
Exemplo n.º 9
0
def get_response_content(fs):
    # get the mutation process nucleotide distribution
    nt_distribution = SnippetUtil.get_distribution(fs.nucleotides,
            'nucleotide', nt_ordered)
    # get the selection process amino acid energies
    aa_to_energy = SnippetUtil.get_dictionary(fs.aminoacids,
            'amino acid', 'energy', aa_ordered)
    # create the direct protein rate matrix object
    nt_distribution_list = [nt_distribution[nt] for nt in nt_ordered]
    aa_energy_list = [aa_to_energy[aa] for aa in aa_ordered]
    rate_matrix_object = DirectProtein.DirectProteinRateMatrix(fs.kappa,
            nt_distribution_list, aa_energy_list)
    # write the response
    out = StringIO()
    if fs.srm:
        # write the scaled rate matrix
        rate_matrix_object.normalize()
        row_major_rate_matrix = rate_matrix_object.get_row_major_rate_matrix()
        print >> out, MatrixUtil.m_to_string(row_major_rate_matrix)
    elif fs.urm:
        # write the unscaled rate matrix
        row_major_rate_matrix = rate_matrix_object.get_row_major_rate_matrix()
        print >> out, MatrixUtil.m_to_string(row_major_rate_matrix)
    elif fs.cstat:
        # write the codon stationary distribution
        codon_distribution = rate_matrix_object.get_codon_distribution()
        for codon in codons_ordered:
            print >> out, codon, ':', codon_distribution[codon]
    elif fs.astat:
        # write the amino acid stationary distribution
        aa_distribution = rate_matrix_object.get_aa_distribution()
        for aa in aa_ordered:
            print >> out, aa, ':', aa_distribution[aa]
    elif fs.nstat:
        # write the nucleotide stationary distribution
        nt_distribution = rate_matrix_object.get_nt_distribution()
        for nt in nt_ordered:
            print >> out, nt, ':', nt_distribution[nt]
    elif fs.sf:
        # write the rate matrix scaling factor
        print >> out, rate_matrix_object.get_expected_rate()
    # return the response
    return out.getvalue() + '\n'
Exemplo n.º 10
0
 def __call__(self, X):
     """
     The input is a triple.
     These three variables define the mutation process
     nucleotide distribution
     @param X: a triple
     @return: the zero vector when the guess was right
     """
     self.guesses.append(X)
     if len(X) != 3:
         raise ValueError('incorrect number of parameters')
     x, y, z = X
     mutation_nt_weights = (1, math.exp(x), math.exp(y), math.exp(z))
     mutation_nt_dist = normalized(mutation_nt_weights)
     pair = DirectProtein.get_nt_distribution_and_aa_energies(
         mutation_nt_dist, self.aa_dist)
     stationary_nt_dist, aa_energies = pair
     return tuple(
         math.log(a / b)
         for a, b in zip(self.nt_dist[:3], stationary_nt_dist[:3]))
Exemplo n.º 11
0
 def evaluate(self, X):
     """
     @param X: the three vars defining the mutation process nt distribution.
     @return: a tuple which is the zero vector when the guess was right.
     """
     if len(X) != 3:
         raise ValueError('incorrect number of parameters')
     x, y, z = X
     mutation_nt_weights = (1, math.exp(x), math.exp(y), math.exp(z))
     mutation_nt_dist = normalized(mutation_nt_weights)
     if not almost_equals(sum(mutation_nt_dist), 1.0):
         msg_a ='detected possibly invalid objective function in put: '
         msg_b = str(X)
         raise ValueError(msg_a + msg_b)
     pair = DirectProtein.get_nt_distribution_and_aa_energies(
             mutation_nt_dist, self.aa_dist)
     stationary_nt_dist, aa_energies = pair
     evaluation = tuple(math.log(a/b)
             for a, b in zip(self.nt_dist[:3], stationary_nt_dist[:3]))
     return evaluation, aa_energies
Exemplo n.º 12
0
def get_response_content(fs):
    # get the newick string
    try:
        tree = Newick.parse(fs.tree, Newick.NewickTree)
        tree.assert_valid()
    except Newick.NewickSyntaxError as e:
        raise HandlingError(e)
    # get the normalized Direct RNA mixture model
    mixture_model = DirectProtein.deserialize_mixture_model(fs.model)
    mixture_model.normalize()
    # simulate the alignment
    try:
        alignment = PhyLikelihood.simulate_alignment(tree,
                mixture_model, fs.ncols)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the alignment
    arr = []
    for node in tree.gen_tips():
        arr.append(alignment.get_fasta_sequence(node.name))
    # return the alignment string
    return '\n'.join(arr) + '\n'
Exemplo n.º 13
0
def get_response_content(fs):
    # deserialize the xml data to create a DirectProteinMixture
    try:
        mixture_model = DirectProtein.deserialize_mixture_model(fs.model)
    except ValueError as e:
        raise HandlingError(e)
    # Normalize the mixture model to have an expected rate of one
    # substitution per unit of branch length.
    mixture_model.normalize()
    # begin writing the html file
    out = StringIO()
    # write the html header
    print >> out, '<html>'
    print >> out, '<head>'
    print >> out, '<style type="text/css">td{font-size:x-small;}</style>'
    print >> out, '</head>'
    print >> out, '<body>'
    # write the symmetric components of the rate matrices
    for category_i, matrix_object in enumerate(mixture_model.rate_matrices):
        codon_v = matrix_object.get_stationary_distribution()
        matrix = matrix_object.dictionary_rate_matrix
        symmetric_matrix = {}
        for ca, pa in zip(codons, codon_v):
            for cb, pb in zip(codons, codon_v):
                value = matrix[(ca, cb)] / (math.sqrt(pb) / math.sqrt(pa))
                symmetric_matrix[(ca, cb)] = value
        print >> out, 'the symmetric component of the rate matrix'
        print >> out, 'for category %d:' % (category_i + 1)
        print >> out, '<table>'
        print >> out, RateMatrix.codon_rate_matrix_to_html_string(
            symmetric_matrix)
        print >> out, '</table>'
        print >> out, '<br/><br/>'
    # write the html footer
    print >> out, '</body>'
    print >> out, '</html>'
    # return the response
    return out.getvalue()
Exemplo n.º 14
0
def get_response_content(fs):
    # deserialize the xml data to create a DirectProteinMixture
    try:
        mixture_model = DirectProtein.deserialize_mixture_model(fs.model)
    except ValueError as e:
        raise HandlingError(e)
    # Normalize the mixture model to have an expected rate of one
    # substitution per unit of branch length.
    mixture_model.normalize()
    # begin writing the html file
    out = StringIO()
    # write the html header
    print >> out, '<html>'
    print >> out, '<head>'
    print >> out, '<style type="text/css">td{font-size:x-small;}</style>'
    print >> out, '</head>'
    print >> out, '<body>'
    # write the symmetric components of the rate matrices
    for category_i, matrix_object in enumerate(mixture_model.rate_matrices):
        codon_v = matrix_object.get_stationary_distribution()
        matrix = matrix_object.dictionary_rate_matrix
        symmetric_matrix = {}
        for ca, pa in zip(codons, codon_v):
            for cb, pb in zip(codons, codon_v):
                value = matrix[(ca, cb)] / (math.sqrt(pb) / math.sqrt(pa))
                symmetric_matrix[(ca, cb)] = value
        print >> out, 'the symmetric component of the rate matrix'
        print >> out, 'for category %d:' % (category_i + 1)
        print >> out, '<table>'
        print >> out, RateMatrix.codon_rate_matrix_to_html_string(
                symmetric_matrix)
        print >> out, '</table>'
        print >> out, '<br/><br/>'
    # write the html footer
    print >> out, '</body>'
    print >> out, '</html>'
    # return the response
    return out.getvalue()
Exemplo n.º 15
0
def get_form():
    """
    @return: the body of a form
    """
    default_xml_string = DirectProtein.get_sample_xml_string().strip()
    return [Form.MultiLine('model', 'mixture model', default_xml_string)]
Exemplo n.º 16
0
def get_response_content(fs):
    # get the nucleotide distribution
    nt_to_weight = SnippetUtil.get_distribution(fs.nucleotides, 'nucleotide',
                                                nt_letters)
    # get the amino acid distribution
    aa_to_weight = SnippetUtil.get_distribution(fs.aminoacids, 'amino acid',
                                                aa_letters)
    # get distributions in convenient list form
    stationary_nt_distribution = [nt_to_weight[nt] for nt in nt_letters]
    aa_distribution = [aa_to_weight[aa] for aa in aa_letters]
    codon_distribution = []
    implied_stationary_nt_distribution = []
    if fs.corrected:
        # define the objective function
        objective_function = MyObjective(aa_distribution,
                                         stationary_nt_distribution)
        initial_guess = (0, 0, 0)
        iterations = 20
        best = scipy.optimize.nonlin.broyden2(objective_function,
                                              initial_guess, iterations)
        x, y, z = best
        best_mutation_weights = (1, math.exp(x), math.exp(y), math.exp(z))
        best_mutation_distribution = normalized(best_mutation_weights)
        # Given the mutation distribution and the amino acid distribution,
        # get the stationary distribution.
        result = DirectProtein.get_nt_distribution_and_aa_energies(
            best_mutation_distribution, aa_distribution)
        implied_stationary_nt_distribution, result_aa_energies = result
        # Get the codon distribution;
        # kappa doesn't matter because we are only concerned
        # with stationary distributions
        kappa = 1.0
        dpm = DirectProtein.DirectProteinRateMatrix(
            kappa, best_mutation_distribution, result_aa_energies)
        codon_distribution = dpm.get_stationary_distribution()
    elif fs.hb:
        # get the codon distribution
        unnormalized_codon_distribution = []
        for codon in codons:
            aa = Codon.g_codon_to_aa_letter[codon]
            sibling_codons = Codon.g_aa_letter_to_codons[aa]
            codon_aa_weight = aa_to_weight[aa]
            codon_nt_weight = np.prod([nt_to_weight[nt] for nt in codon])
            sibling_nt_weight_sum = sum(
                np.prod([nt_to_weight[nt] for nt in sibling])
                for sibling in sibling_codons)
            weight = codon_aa_weight * codon_nt_weight
            weight /= sibling_nt_weight_sum
            unnormalized_codon_distribution.append(weight)
        codon_distribution = normalized(unnormalized_codon_distribution)
        nt_to_weight = dict(zip(nt_letters, [0] * 4))
        for codon, p in zip(codons, codon_distribution):
            for nt in codon:
                nt_to_weight[nt] += p
        implied_stationary_nt_distribution = normalized(nt_to_weight[nt]
                                                        for nt in nt_letters)
    # start the output text string
    out = StringIO()
    # write the codon stationary distribution
    print >> out, 'estimated codon stationary distribution:'
    for codon, p in zip(codons, codon_distribution):
        print >> out, '%s : %s' % (codon, p)
    print >> out, ''
    # write the nucleotide stationary distribution
    print >> out, 'implied nucleotide stationary distribution:'
    for nt, p in zip(nt_letters, implied_stationary_nt_distribution):
        print >> out, '%s : %s' % (nt, p)
    # return the response
    return out.getvalue()
Exemplo n.º 17
0
def get_response_content(fs):
    # get the nucleotide distribution
    nt_to_weight = SnippetUtil.get_distribution(fs.nucleotides,
            'nucleotide', nt_letters)
    # get the amino acid distribution
    aa_to_weight = SnippetUtil.get_distribution(fs.aminoacids,
            'amino acid', aa_letters)
    # get distributions in convenient list form
    stationary_nt_distribution = [nt_to_weight[nt] for nt in nt_letters]
    aa_distribution = [aa_to_weight[aa] for aa in aa_letters]
    codon_distribution = []
    implied_stationary_nt_distribution = []
    if fs.corrected:
        # define the objective function
        objective_function = MyObjective(aa_distribution,
                stationary_nt_distribution)
        initial_guess = (0, 0, 0)
        iterations = 20
        best = scipy.optimize.nonlin.broyden2(objective_function,
                initial_guess, iterations)
        x, y, z = best
        best_mutation_weights = (1, math.exp(x), math.exp(y), math.exp(z))
        best_mutation_distribution = normalized(best_mutation_weights)
        # Given the mutation distribution and the amino acid distribution,
        # get the stationary distribution.
        result = DirectProtein.get_nt_distribution_and_aa_energies(
                best_mutation_distribution, aa_distribution)
        implied_stationary_nt_distribution, result_aa_energies = result
        # Get the codon distribution;
        # kappa doesn't matter because we are only concerned
        # with stationary distributions
        kappa = 1.0
        dpm = DirectProtein.DirectProteinRateMatrix(
                kappa, best_mutation_distribution, result_aa_energies)
        codon_distribution = dpm.get_stationary_distribution()
    elif fs.hb:
        # get the codon distribution
        unnormalized_codon_distribution = []
        for codon in codons:
            aa = Codon.g_codon_to_aa_letter[codon]
            sibling_codons = Codon.g_aa_letter_to_codons[aa]
            codon_aa_weight = aa_to_weight[aa]
            codon_nt_weight = np.prod([nt_to_weight[nt] for nt in codon])
            sibling_nt_weight_sum = sum(np.prod([nt_to_weight[nt]
                for nt in sibling]) for sibling in sibling_codons)
            weight = codon_aa_weight * codon_nt_weight
            weight /= sibling_nt_weight_sum
            unnormalized_codon_distribution.append(weight)
        codon_distribution = normalized(unnormalized_codon_distribution)
        nt_to_weight = dict(zip(nt_letters, [0]*4))
        for codon, p in zip(codons, codon_distribution):
            for nt in codon:
                nt_to_weight[nt] += p
        implied_stationary_nt_distribution = normalized(nt_to_weight[nt]
                for nt in nt_letters)
    # start the output text string
    out = StringIO()
    # write the codon stationary distribution
    print >> out, 'estimated codon stationary distribution:'
    for codon, p in zip(codons, codon_distribution):
        print >> out, '%s : %s' % (codon, p)
    print >> out, ''
    # write the nucleotide stationary distribution
    print >> out, 'implied nucleotide stationary distribution:'
    for nt, p in zip(nt_letters, implied_stationary_nt_distribution):
        print >> out, '%s : %s' % (nt, p)
    # return the response
    return out.getvalue()
Exemplo n.º 18
0
    objective_function = MyCodonObjective(aa_distribution, observed_nt_stationary_distribution)
    initial_stationary_guess = halpern_bruno_nt_estimate(nt_to_probability, aa_to_probability)
    A, C, G, T = initial_stationary_guess
    initial_guess = (math.log(C / A), math.log(G / A), math.log(T / A))
    iterations = 20
    try:
        best = scipy.optimize.nonlin.broyden2(objective_function, initial_guess, iterations)
    except Exception, e:
        debugging_information = objective_function.get_history()
        raise HandlingError(str(e) + "\n" + debugging_information)
    x, y, z = best
    best_mutation_weights = (1, math.exp(x), math.exp(y), math.exp(z))
    best_mutation_distribution = normalized(best_mutation_weights)
    # Given the mutation distribution and the amino acid distribution,
    # get the stationary distribution.
    result = DirectProtein.get_nt_distribution_and_aa_energies(best_mutation_distribution, aa_distribution)
    result_stationary_nt_dist, result_aa_energies = result
    # make a results string
    out = StringIO()
    # write the stationary nucleotide distribution of the mutation process
    print >> out, "mutation nucleotide stationary distribution:"
    for nt, probability in zip(nt_letters, best_mutation_distribution):
        print >> out, "%s : %s" % (nt, probability)
    # write the centered amino acid energies
    print >> out, ""
    print >> out, "amino acid energies:"
    for aa, energy in zip(aa_letters, result_aa_energies):
        print >> out, "%s : %s" % (aa, energy)
    # return the response
    return out.getvalue()
Exemplo n.º 19
0
def get_form():
    """
    @return: the body of a form
    """
    default_xml_string = DirectProtein.get_sample_xml_string().strip()
    return [Form.MultiLine('model', 'mixture model', default_xml_string)]
Exemplo n.º 20
0
            aa_to_probability)
    A, C, G, T = initial_stationary_guess
    initial_guess = (math.log(C/A), math.log(G/A), math.log(T/A))
    iterations = 20
    try:
        best = scipy.optimize.nonlin.broyden2(objective_function,
                initial_guess, iterations)
    except Exception, e:
        debugging_information = objective_function.get_history()
        raise HandlingError(str(e) + '\n' + debugging_information)
    x, y, z = best
    best_mutation_weights = (1, math.exp(x), math.exp(y), math.exp(z))
    best_mutation_distribution = normalized(best_mutation_weights)
    # Given the mutation distribution and the amino acid distribution,
    # get the stationary distribution.
    result = DirectProtein.get_nt_distribution_and_aa_energies(
            best_mutation_distribution, aa_distribution)
    result_stationary_nt_dist, result_aa_energies = result
    # make a results string
    out = StringIO()
    # write the stationary nucleotide distribution of the mutation process
    print >> out, 'mutation nucleotide stationary distribution:'
    for nt, probability in zip(nt_letters, best_mutation_distribution):
        print >> out, '%s : %s' % (nt, probability)
    # write the centered amino acid energies
    print >> out, ''
    print >> out, 'amino acid energies:'
    for aa, energy in zip(aa_letters, result_aa_energies):
        print >> out, '%s : %s' % (aa, energy)
    # return the response
    return out.getvalue()