Exemplo n.º 1
0
 def test_unequal_log_likelihood(self):
     """Assert that the log likelihood function gives greater log likelihood to the more likely choice."""
     # define a sequence pair with many more transitions than transversions
     sequence_pair = ('AAAAAAAAAACCCCCCCCCC', 'ATGGGGGGGGCGTTTTTTTT')
     uniform_nt_dist = (0.25, 0.25, 0.25, 0.25)
     more_likely_model = F84.create_rate_matrix(1.0, uniform_nt_dist)
     less_likely_model = F84.create_rate_matrix(0.0, uniform_nt_dist)
     greater_log_likelihood = get_log_likelihood(1.0, sequence_pair, more_likely_model)
     smaller_log_likelihood = get_log_likelihood(1.0, sequence_pair, less_likely_model)
     self.failUnless(greater_log_likelihood > smaller_log_likelihood)
Exemplo n.º 2
0
 def test_unequal_log_likelihood(self):
     """Assert that the log likelihood function gives greater log likelihood to the more likely choice."""
     # define a sequence pair with many more transitions than transversions
     sequence_pair = ('AAAAAAAAAACCCCCCCCCC', 'ATGGGGGGGGCGTTTTTTTT')
     uniform_nt_dist = (0.25, 0.25, 0.25, 0.25)
     more_likely_model = F84.create_rate_matrix(1.0, uniform_nt_dist)
     less_likely_model = F84.create_rate_matrix(0.0, uniform_nt_dist)
     greater_log_likelihood = get_log_likelihood(1.0, sequence_pair,
                                                 more_likely_model)
     smaller_log_likelihood = get_log_likelihood(1.0, sequence_pair,
                                                 less_likely_model)
     self.failUnless(greater_log_likelihood > smaller_log_likelihood)
Exemplo n.º 3
0
def get_response_content(fs):
    # read the nucleotide weights
    nt_weights = [fs.A, fs.C, fs.G, fs.T]
    # convert the nucleotide weights to probabilities
    nt_probs = [x / float(sum(nt_weights)) for x in nt_weights]
    # Assert that the kappa value and the nucleotide
    # probabilities are compatible.
    A, C, G, T = nt_probs
    R = float(A + G)
    Y = float(C + T)
    if R <= 0:
        raise HandlingError("the frequency of a purine must be positive")
    if Y <= 0:
        raise HandlingError("the frequency of a pyrimidine must be positive")
    if fs.kappa <= max(-Y, -R):
        msg_a = "kappa must be greater than max(-R, -Y) "
        msg_b = "where R and Y are the purine and pyrimidine frequencies"
        raise HandlingError(msg_a + msg_b)
    # Create the rate matrix object
    # which is automatically scaled to a rate of 1.0.
    model = F84.create_rate_matrix(fs.kappa, nt_probs)
    # simulate a pair of sequences
    sequence_pair = PairLikelihood.simulate_sequence_pair(fs.distance, model, fs.length)
    # convert the pair of sequences to an alignment object
    aln = StringIO()
    print >> aln, ">first"
    print >> aln, "".join(sequence_pair[0])
    print >> aln, ">second"
    print >> aln, "".join(sequence_pair[1])
    return Fasta.Alignment(StringIO(aln.getvalue())).to_fasta_string() + "\n"
Exemplo n.º 4
0
def get_response_content(fs):
    # read the nucleotide weights
    nt_weights = [fs.A, fs.C, fs.G, fs.T]
    # convert the nucleotide weights to probabilities
    nt_probs = [x / float(sum(nt_weights)) for x in nt_weights]
    # Assert that the kappa value and the nucleotide
    # probabilities are compatible.
    A, C, G, T = nt_probs
    R = float(A + G)
    Y = float(C + T)
    if R <= 0:
        raise HandlingError('the frequency of a purine must be positive')
    if Y <= 0:
        raise HandlingError('the frequency of a pyrimidine must be positive')
    if fs.kappa <= max(-Y, -R):
        msg_a = 'kappa must be greater than max(-R, -Y) '
        msg_b = 'where R and Y are the purine and pyrimidine frequencies'
        raise HandlingError(msg_a + msg_b)
    # Create the rate matrix object
    # which is automatically scaled to a rate of 1.0.
    model = F84.create_rate_matrix(fs.kappa, nt_probs)
    # simulate a pair of sequences
    sequence_pair = PairLikelihood.simulate_sequence_pair(
        fs.distance, model, fs.length)
    # convert the pair of sequences to an alignment object
    aln = StringIO()
    print >> aln, '>first'
    print >> aln, ''.join(sequence_pair[0])
    print >> aln, '>second'
    print >> aln, ''.join(sequence_pair[1])
    return Fasta.Alignment(StringIO(aln.getvalue())).to_fasta_string() + '\n'
Exemplo n.º 5
0
 def test_basic_log_likelihood(self):
     """Assert that the log likelihood function does not give an error for simple input."""
     kappa = 0.5
     nt_dist = (0.25, 0.25, 0.25, 0.25)
     model = F84.create_rate_matrix(kappa, nt_dist)
     sequence_pair = ('AAAA', 'ACGT')
     t = 2.0
     result = get_log_likelihood(t, sequence_pair, model)
Exemplo n.º 6
0
 def test_basic_log_likelihood(self):
     """Assert that the log likelihood function does not give an error for simple input."""
     kappa = 0.5
     nt_dist = (0.25, 0.25, 0.25, 0.25)
     model = F84.create_rate_matrix(kappa, nt_dist)
     sequence_pair = ('AAAA', 'ACGT')
     t = 2.0
     result = get_log_likelihood(t, sequence_pair, model)
Exemplo n.º 7
0
def get_response_content(fs):
    # get the alignment object
    try:
        alignment = Fasta.Alignment(StringIO(fs.fasta))
    except Fasta.AlignmentError as e:
        raise HandlingError('alignment error: ' + str(e))
    # assert that the alignment is of exactly two sequences
    if len(alignment.sequences) != 2:
        raise HandlingError('expected a pair of sequences')
    # assert that the alignment is a gapless unambiguous nucleotide alignment
    old_column_count = alignment.get_column_count()
    try:
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError('nucleotide alignment error: ' + str(e))
    new_column_count = alignment.get_column_count()
    if old_column_count != new_column_count:
        msg = 'expected a gapless unambiguous nucleotide alignment'
        raise HandlingError(msg)
    # get the maximum likelihood estimates according to a numeric optimizer.
    f = F84.Objective(alignment.sequences)
    values = list(f.get_initial_parameters())
    result = scipy.optimize.fmin(f, values, ftol=1e-10, disp=0)
    distance, kappa, wC, wG, wT = result
    nt_distribution = F84.parameters_to_distribution((wC, wG, wT))
    A, C, G, T = nt_distribution
    model = F84.create_rate_matrix(kappa, nt_distribution)
    log_likelihood = PairLikelihood.get_log_likelihood(distance,
                                                       alignment.sequences,
                                                       model)
    # begin the response
    out = StringIO()
    print >> out, 'ML distance:', distance
    print >> out, 'ML kappa:', kappa
    print >> out, 'ML A frequency:', A
    print >> out, 'ML C frequency:', C
    print >> out, 'ML G frequency:', G
    print >> out, 'ML T frequency:', T
    print >> out, 'log likelihood:', log_likelihood
    # write the response
    return out.getvalue()
Exemplo n.º 8
0
def get_response_content(fs):
    # get the alignment object
    try:
        alignment = Fasta.Alignment(StringIO(fs.fasta))
    except Fasta.AlignmentError as e:
        raise HandlingError('alignment error: ' + str(e))
    # assert that the alignment is of exactly two sequences
    if len(alignment.sequences) != 2:
        raise HandlingError('expected a pair of sequences')
    # assert that the alignment is a gapless unambiguous nucleotide alignment
    old_column_count = alignment.get_column_count()
    try:
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError('nucleotide alignment error: ' + str(e))
    new_column_count = alignment.get_column_count()
    if old_column_count != new_column_count:
        msg = 'expected a gapless unambiguous nucleotide alignment'
        raise HandlingError(msg)
    # get the maximum likelihood estimates according to a numeric optimizer.
    f = F84.Objective(alignment.sequences)
    values = list(f.get_initial_parameters())
    result = scipy.optimize.fmin(f, values, ftol=1e-10, disp=0)
    distance, kappa, wC, wG, wT= result
    nt_distribution = F84.parameters_to_distribution((wC, wG, wT))
    A, C, G, T = nt_distribution
    model = F84.create_rate_matrix(kappa, nt_distribution)
    log_likelihood = PairLikelihood.get_log_likelihood(
            distance, alignment.sequences, model)
    # begin the response
    out = StringIO()
    print >> out, 'ML distance:', distance
    print >> out, 'ML kappa:', kappa
    print >> out, 'ML A frequency:', A
    print >> out, 'ML C frequency:', C
    print >> out, 'ML G frequency:', G
    print >> out, 'ML T frequency:', T
    print >> out, 'log likelihood:', log_likelihood
    # write the response
    return out.getvalue()
Exemplo n.º 9
0
def get_response_content(fs):
    # get the alignment object
    try:
        alignment = Fasta.Alignment(fs.fasta.splitlines())
    except Fasta.AlignmentError as e:
        raise HandlingError('alignment error: ' + str(e))
    # assert that the alignment is of exactly two sequences
    if len(alignment.sequences) != 2:
        raise HandlingError('expected a pair of sequences')
    # assert that the alignment is a gapless unambiguous nucleotide alignment
    old_column_count = alignment.get_column_count()
    try:
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError('nucleotide alignment error: ' + str(e))
    new_column_count = alignment.get_column_count()
    if old_column_count != new_column_count:
        msg = 'expected a gapless unambiguous nucleotide alignment'
        raise HandlingError(msg)
    # get the maximum likelihood estimates
    sequence_pair = alignment.sequences
    distance, kappa, A, C, G, T = F84.get_closed_form_estimates(sequence_pair)
    # get the log likelihood
    nt_distribution = (A, C, G, T)
    rate_matrix_object = F84.create_rate_matrix(kappa, nt_distribution)
    log_likelihood = PairLikelihood.get_log_likelihood(distance,
                                                       alignment.sequences,
                                                       rate_matrix_object)
    # begin the response
    out = StringIO()
    print >> out, 'distance:', distance
    print >> out, 'kappa:', kappa
    print >> out, 'A frequency:', A
    print >> out, 'C frequency:', C
    print >> out, 'G frequency:', G
    print >> out, 'T frequency:', T
    print >> out, 'log likelihood:', log_likelihood
    # return the response
    return out.getvalue()
Exemplo n.º 10
0
 def test_basic_simulation(self):
     """Assert that the sequence pair simulation does not give an error for simple input."""
     kappa = 0.5
     nt_dist = (0.25, 0.25, 0.25, 0.25)
     model = F84.create_rate_matrix(kappa, nt_dist)
     t = 2.0
     n_sites = 100
     result = simulate_sequence_pair(t, model, n_sites)
     self.assertEqual(len(result), 2)
     self.assertEqual(len(result[0]), 100)
     self.assertEqual(len(result[1]), 100)
     result_state_set = set(result[0]+result[1])
     self.failUnless(result_state_set <= set('ACGT'))
Exemplo n.º 11
0
 def test_basic_simulation(self):
     """Assert that the sequence pair simulation does not give an error for simple input."""
     kappa = 0.5
     nt_dist = (0.25, 0.25, 0.25, 0.25)
     model = F84.create_rate_matrix(kappa, nt_dist)
     t = 2.0
     n_sites = 100
     result = simulate_sequence_pair(t, model, n_sites)
     self.assertEqual(len(result), 2)
     self.assertEqual(len(result[0]), 100)
     self.assertEqual(len(result[1]), 100)
     result_state_set = set(result[0] + result[1])
     self.failUnless(result_state_set <= set('ACGT'))
Exemplo n.º 12
0
def get_response_content(fs):
    # get the alignment object
    try:
        alignment = Fasta.Alignment(fs.fasta.splitlines())
    except Fasta.AlignmentError as e:
        raise HandlingError('alignment error: ' + str(e))
    # assert that the alignment is of exactly two sequences
    if len(alignment.sequences) != 2:
        raise HandlingError('expected a pair of sequences')
    # assert that the alignment is a gapless unambiguous nucleotide alignment
    old_column_count = alignment.get_column_count()
    try:
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError('nucleotide alignment error: ' + str(e))
    new_column_count = alignment.get_column_count()
    if old_column_count != new_column_count:
        msg = 'expected a gapless unambiguous nucleotide alignment'
        raise HandlingError(msg)
    # get the maximum likelihood estimates
    sequence_pair = alignment.sequences
    distance, kappa, A, C, G, T = F84.get_closed_form_estimates(sequence_pair)
    # get the log likelihood
    nt_distribution = (A, C, G, T)
    rate_matrix_object = F84.create_rate_matrix(kappa, nt_distribution)
    log_likelihood = PairLikelihood.get_log_likelihood(
            distance, alignment.sequences, rate_matrix_object)
    # begin the response
    out = StringIO()
    print >> out, 'distance:', distance
    print >> out, 'kappa:', kappa
    print >> out, 'A frequency:', A
    print >> out, 'C frequency:', C
    print >> out, 'G frequency:', G
    print >> out, 'T frequency:', T
    print >> out, 'log likelihood:', log_likelihood
    # return the response
    return out.getvalue()