Python PairLikelihood примеры, PairLikelihood Python примеры использования

Пример #1

0

Показать файл

Файл: F84.py Проект: BIGtigr/xgcode

 def testLikelihood(self):
     """
     Assert that no errors occur during the analysis
     """
     # define a simple (but not completely degenerate) alignment
     sa = 'AAAACCCCGGGGTTAA'
     sb = 'GAAACCTCGGCGTAAA'
     sequence_pair = (sa, sb)
     # get estimates according to an analytical formula which is not necessarily the mle
     distance_mle, kappa_mle, A_mle, C_mle, G_mle, T_mle = get_closed_form_estimates(
         (sa, sb))
     nt_distribution_mle = (A_mle, C_mle, G_mle, T_mle)
     rate_matrix_object = create_rate_matrix(kappa_mle, nt_distribution_mle)
     log_likelihood_mle = PairLikelihood.get_log_likelihood(
         distance_mle, sequence_pair, rate_matrix_object)
     # get the maximum likelihood estimates according to a numeric optimizer.
     f = Objective((sa, sb))
     values = list(f.get_initial_parameters())
     result = scipy.optimize.fmin(f, values, ftol=.0000000001, disp=0)
     distance_opt, kappa_opt, wC_opt, wG_opt, wT_opt = result
     nt_distribution_opt = parameters_to_distribution(
         (wC_opt, wG_opt, wT_opt))
     rate_matrix_object = create_rate_matrix(kappa_opt, nt_distribution_opt)
     log_likelihood_opt = PairLikelihood.get_log_likelihood(
         distance_opt, sequence_pair, rate_matrix_object)

Пример #2

0

Показать файл

Файл: 20080819a.py Проект: argriffing/xgcode

def get_response_content(fs):
    # read the nucleotide weights
    nt_weights = [fs.A, fs.C, fs.G, fs.T]
    # convert the nucleotide weights to probabilities
    nt_probs = [x / float(sum(nt_weights)) for x in nt_weights]
    # Assert that the kappa value and the nucleotide
    # probabilities are compatible.
    A, C, G, T = nt_probs
    R = float(A + G)
    Y = float(C + T)
    if R <= 0:
        raise HandlingError("the frequency of a purine must be positive")
    if Y <= 0:
        raise HandlingError("the frequency of a pyrimidine must be positive")
    if fs.kappa <= max(-Y, -R):
        msg_a = "kappa must be greater than max(-R, -Y) "
        msg_b = "where R and Y are the purine and pyrimidine frequencies"
        raise HandlingError(msg_a + msg_b)
    # Create the rate matrix object
    # which is automatically scaled to a rate of 1.0.
    model = F84.create_rate_matrix(fs.kappa, nt_probs)
    # simulate a pair of sequences
    sequence_pair = PairLikelihood.simulate_sequence_pair(fs.distance, model, fs.length)
    # convert the pair of sequences to an alignment object
    aln = StringIO()
    print >> aln, ">first"
    print >> aln, "".join(sequence_pair[0])
    print >> aln, ">second"
    print >> aln, "".join(sequence_pair[1])
    return Fasta.Alignment(StringIO(aln.getvalue())).to_fasta_string() + "\n"

Пример #3

0

Показать файл

Файл: 20080819a.py Проект: BIGtigr/xgcode

def get_response_content(fs):
    # read the nucleotide weights
    nt_weights = [fs.A, fs.C, fs.G, fs.T]
    # convert the nucleotide weights to probabilities
    nt_probs = [x / float(sum(nt_weights)) for x in nt_weights]
    # Assert that the kappa value and the nucleotide
    # probabilities are compatible.
    A, C, G, T = nt_probs
    R = float(A + G)
    Y = float(C + T)
    if R <= 0:
        raise HandlingError('the frequency of a purine must be positive')
    if Y <= 0:
        raise HandlingError('the frequency of a pyrimidine must be positive')
    if fs.kappa <= max(-Y, -R):
        msg_a = 'kappa must be greater than max(-R, -Y) '
        msg_b = 'where R and Y are the purine and pyrimidine frequencies'
        raise HandlingError(msg_a + msg_b)
    # Create the rate matrix object
    # which is automatically scaled to a rate of 1.0.
    model = F84.create_rate_matrix(fs.kappa, nt_probs)
    # simulate a pair of sequences
    sequence_pair = PairLikelihood.simulate_sequence_pair(
        fs.distance, model, fs.length)
    # convert the pair of sequences to an alignment object
    aln = StringIO()
    print >> aln, '>first'
    print >> aln, ''.join(sequence_pair[0])
    print >> aln, '>second'
    print >> aln, ''.join(sequence_pair[1])
    return Fasta.Alignment(StringIO(aln.getvalue())).to_fasta_string() + '\n'

Пример #4

0

Показать файл

Файл: F84.py Проект: argriffing/xgcode

 def testLikelihood(self):
     """
     Assert that no errors occur during the analysis
     """
     # define a simple (but not completely degenerate) alignment
     sa = 'AAAACCCCGGGGTTAA'
     sb = 'GAAACCTCGGCGTAAA'
     sequence_pair = (sa, sb)
     # get estimates according to an analytical formula which is not necessarily the mle
     distance_mle, kappa_mle, A_mle, C_mle, G_mle, T_mle = get_closed_form_estimates((sa, sb))
     nt_distribution_mle = (A_mle, C_mle, G_mle, T_mle)
     rate_matrix_object = create_rate_matrix(kappa_mle, nt_distribution_mle)
     log_likelihood_mle = PairLikelihood.get_log_likelihood(distance_mle, sequence_pair, rate_matrix_object)
     # get the maximum likelihood estimates according to a numeric optimizer.
     f = Objective((sa, sb))
     values = list(f.get_initial_parameters())
     result = scipy.optimize.fmin(f, values, ftol=.0000000001, disp=0)
     distance_opt, kappa_opt, wC_opt, wG_opt, wT_opt = result
     nt_distribution_opt = parameters_to_distribution((wC_opt, wG_opt, wT_opt))
     rate_matrix_object = create_rate_matrix(kappa_opt, nt_distribution_opt)
     log_likelihood_opt = PairLikelihood.get_log_likelihood(distance_opt, sequence_pair, rate_matrix_object)

Пример #5

0

Показать файл

 def __call__(self, branch_length):
     """
     This will be called by a one dimensional minimizer.
     @param branch_length: the distance between the two aligned sequences
     @return: the negative log likelihood
     """
     if branch_length < 0:
         return float('inf')
     log_likelihood = PairLikelihood.get_log_likelihood(
         branch_length, self.sequence_pair, self.rate_matrix)
     if log_likelihood is None:
         return float('inf')
     return -log_likelihood

Пример #6

0

Показать файл

Файл: F84.py Проект: argriffing/xgcode

 def __call__(self, theta):
     """
     @param theta: the vector of estimated parameters
     @return: the negative log likelihood to be minimized
     """
     # unpack the parameters
     distance, kappa, wC, wG, wT = theta
     nt_distribution = parameters_to_distribution((wC, wG, wT))
     # make the rate matrix
     model = create_rate_matrix(kappa, nt_distribution)
     # get the likelihood
     log_likelihood = PairLikelihood.get_log_likelihood(distance, self.sequence_pair, model)
     return -log_likelihood

Пример #7

0

Показать файл

Файл: 20080709a.py Проект: argriffing/xgcode

 def __call__(self, branch_length):
     """
     This will be called by a one dimensional minimizer.
     @param branch_length: the distance between the two aligned sequences
     @return: the negative log likelihood
     """
     if branch_length < 0:
         return float('inf')
     log_likelihood = PairLikelihood.get_log_likelihood(
             branch_length, self.sequence_pair, self.rate_matrix)
     if log_likelihood is None:
         return float('inf')
     return -log_likelihood

Пример #8

0

Показать файл

Файл: F84.py Проект: BIGtigr/xgcode

 def __call__(self, theta):
     """
     @param theta: the vector of estimated parameters
     @return: the negative log likelihood to be minimized
     """
     # unpack the parameters
     distance, kappa, wC, wG, wT = theta
     nt_distribution = parameters_to_distribution((wC, wG, wT))
     # make the rate matrix
     model = create_rate_matrix(kappa, nt_distribution)
     # get the likelihood
     log_likelihood = PairLikelihood.get_log_likelihood(
         distance, self.sequence_pair, model)
     return -log_likelihood

Пример #9

0

Показать файл

Файл: 20080821a.py Проект: BIGtigr/xgcode

def get_response_content(fs):
    # get the alignment object
    try:
        alignment = Fasta.Alignment(StringIO(fs.fasta))
    except Fasta.AlignmentError as e:
        raise HandlingError('alignment error: ' + str(e))
    # assert that the alignment is of exactly two sequences
    if len(alignment.sequences) != 2:
        raise HandlingError('expected a pair of sequences')
    # assert that the alignment is a gapless unambiguous nucleotide alignment
    old_column_count = alignment.get_column_count()
    try:
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError('nucleotide alignment error: ' + str(e))
    new_column_count = alignment.get_column_count()
    if old_column_count != new_column_count:
        msg = 'expected a gapless unambiguous nucleotide alignment'
        raise HandlingError(msg)
    # get the maximum likelihood estimates according to a numeric optimizer.
    f = F84.Objective(alignment.sequences)
    values = list(f.get_initial_parameters())
    result = scipy.optimize.fmin(f, values, ftol=1e-10, disp=0)
    distance, kappa, wC, wG, wT = result
    nt_distribution = F84.parameters_to_distribution((wC, wG, wT))
    A, C, G, T = nt_distribution
    model = F84.create_rate_matrix(kappa, nt_distribution)
    log_likelihood = PairLikelihood.get_log_likelihood(distance,
                                                       alignment.sequences,
                                                       model)
    # begin the response
    out = StringIO()
    print >> out, 'ML distance:', distance
    print >> out, 'ML kappa:', kappa
    print >> out, 'ML A frequency:', A
    print >> out, 'ML C frequency:', C
    print >> out, 'ML G frequency:', G
    print >> out, 'ML T frequency:', T
    print >> out, 'log likelihood:', log_likelihood
    # write the response
    return out.getvalue()

Пример #10

0

Показать файл

Файл: 20080821a.py Проект: argriffing/xgcode

def get_response_content(fs):
    # get the alignment object
    try:
        alignment = Fasta.Alignment(StringIO(fs.fasta))
    except Fasta.AlignmentError as e:
        raise HandlingError('alignment error: ' + str(e))
    # assert that the alignment is of exactly two sequences
    if len(alignment.sequences) != 2:
        raise HandlingError('expected a pair of sequences')
    # assert that the alignment is a gapless unambiguous nucleotide alignment
    old_column_count = alignment.get_column_count()
    try:
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError('nucleotide alignment error: ' + str(e))
    new_column_count = alignment.get_column_count()
    if old_column_count != new_column_count:
        msg = 'expected a gapless unambiguous nucleotide alignment'
        raise HandlingError(msg)
    # get the maximum likelihood estimates according to a numeric optimizer.
    f = F84.Objective(alignment.sequences)
    values = list(f.get_initial_parameters())
    result = scipy.optimize.fmin(f, values, ftol=1e-10, disp=0)
    distance, kappa, wC, wG, wT= result
    nt_distribution = F84.parameters_to_distribution((wC, wG, wT))
    A, C, G, T = nt_distribution
    model = F84.create_rate_matrix(kappa, nt_distribution)
    log_likelihood = PairLikelihood.get_log_likelihood(
            distance, alignment.sequences, model)
    # begin the response
    out = StringIO()
    print >> out, 'ML distance:', distance
    print >> out, 'ML kappa:', kappa
    print >> out, 'ML A frequency:', A
    print >> out, 'ML C frequency:', C
    print >> out, 'ML G frequency:', G
    print >> out, 'ML T frequency:', T
    print >> out, 'log likelihood:', log_likelihood
    # write the response
    return out.getvalue()

Пример #11

0

Показать файл

def get_response_content(fs):
    # get the alignment object
    try:
        alignment = Fasta.Alignment(fs.fasta.splitlines())
    except Fasta.AlignmentError as e:
        raise HandlingError('alignment error: ' + str(e))
    # assert that the alignment is of exactly two sequences
    if len(alignment.sequences) != 2:
        raise HandlingError('expected a pair of sequences')
    # assert that the alignment is a gapless unambiguous nucleotide alignment
    old_column_count = alignment.get_column_count()
    try:
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError('nucleotide alignment error: ' + str(e))
    new_column_count = alignment.get_column_count()
    if old_column_count != new_column_count:
        msg = 'expected a gapless unambiguous nucleotide alignment'
        raise HandlingError(msg)
    # get the maximum likelihood estimates
    sequence_pair = alignment.sequences
    distance, kappa, A, C, G, T = F84.get_closed_form_estimates(sequence_pair)
    # get the log likelihood
    nt_distribution = (A, C, G, T)
    rate_matrix_object = F84.create_rate_matrix(kappa, nt_distribution)
    log_likelihood = PairLikelihood.get_log_likelihood(distance,
                                                       alignment.sequences,
                                                       rate_matrix_object)
    # begin the response
    out = StringIO()
    print >> out, 'distance:', distance
    print >> out, 'kappa:', kappa
    print >> out, 'A frequency:', A
    print >> out, 'C frequency:', C
    print >> out, 'G frequency:', G
    print >> out, 'T frequency:', T
    print >> out, 'log likelihood:', log_likelihood
    # return the response
    return out.getvalue()

Пример #12

0

Показать файл

Файл: 20080819b.py Проект: argriffing/xgcode

def get_response_content(fs):
    # get the alignment object
    try:
        alignment = Fasta.Alignment(fs.fasta.splitlines())
    except Fasta.AlignmentError as e:
        raise HandlingError('alignment error: ' + str(e))
    # assert that the alignment is of exactly two sequences
    if len(alignment.sequences) != 2:
        raise HandlingError('expected a pair of sequences')
    # assert that the alignment is a gapless unambiguous nucleotide alignment
    old_column_count = alignment.get_column_count()
    try:
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError('nucleotide alignment error: ' + str(e))
    new_column_count = alignment.get_column_count()
    if old_column_count != new_column_count:
        msg = 'expected a gapless unambiguous nucleotide alignment'
        raise HandlingError(msg)
    # get the maximum likelihood estimates
    sequence_pair = alignment.sequences
    distance, kappa, A, C, G, T = F84.get_closed_form_estimates(sequence_pair)
    # get the log likelihood
    nt_distribution = (A, C, G, T)
    rate_matrix_object = F84.create_rate_matrix(kappa, nt_distribution)
    log_likelihood = PairLikelihood.get_log_likelihood(
            distance, alignment.sequences, rate_matrix_object)
    # begin the response
    out = StringIO()
    print >> out, 'distance:', distance
    print >> out, 'kappa:', kappa
    print >> out, 'A frequency:', A
    print >> out, 'C frequency:', C
    print >> out, 'G frequency:', G
    print >> out, 'T frequency:', T
    print >> out, 'log likelihood:', log_likelihood
    # return the response
    return out.getvalue()

Python PairLikelihood примеры использования