Пример #1
0
def compare_one(col, cons_aa, aln_size, weights, aa_freqs, pseudo_size):
    """Compare column amino acid frequencies to overall via G-test."""
    observed = count_col(col, weights, aa_freqs, pseudo_size)
    G = 2 * sum(obsv * math.log(obsv / aa_freqs.get(aa, 0.0))
                for aa, obsv in observed.iteritems())
    pvalue = chisqprob(G, 19)
    return pvalue
Пример #2
0
def compare_one(col, cons_aa, aln_size, weights, aa_freqs, pseudo_size):
    """Compare column amino acid frequencies to overall via G-test."""
    observed = count_col(col, weights, aa_freqs, pseudo_size)
    G = 2 * sum(obsv * math.log(obsv / aa_freqs.get(aa, 0.0))
                for aa, obsv in observed.iteritems())
    pvalue = chisqprob(G, 19)
    return pvalue
Пример #3
0
def compare_cols(fg_col, fg_cons, fg_size, fg_weights, bg_col, bg_cons, bg_size, bg_weights, aa_freqs, pseudo_size):
    """Compare amino acid frequencies between aligned columns via G-test."""
    # Calculate the "expected" aa frequencies
    bg_counts = count_col(bg_col, bg_weights, aa_freqs, pseudo_size)
    expected = {}
    for aa in "ACDEFGHIKLMNPQRSTVWY":
        # Scale to same size as foreground
        expected[aa] = fg_size * (bg_counts[aa] / (bg_size + pseudo_size))
    # Calculate the G-value of observed vs. expected
    observed = count_col(fg_col, fg_weights)
    G = 2 * sum(obsv * math.log(obsv / expected[aa]) for aa, obsv in observed.iteritems())
    # 4. Calculate the Chi-squared p-value of G
    pvalue = chisqprob(G, 19)
    return pvalue
Пример #4
0
def compare_cols(fg_col, fg_cons, fg_size, fg_weights, bg_col, bg_cons,
                 bg_size, bg_weights, aa_freqs, pseudo_size):
    """Compare amino acid frequencies between aligned columns via G-test."""
    # Calculate the "expected" aa frequencies
    bg_counts = count_col(bg_col, bg_weights, aa_freqs, pseudo_size)
    expected = {}
    for aa in 'ACDEFGHIKLMNPQRSTVWY':
        # Scale to same size as foreground
        expected[aa] = fg_size * (bg_counts[aa] / (bg_size + pseudo_size))
    # Calculate the G-value of observed vs. expected
    observed = count_col(fg_col, fg_weights)
    G = 2 * sum(obsv * math.log(obsv / expected[aa])
                for aa, obsv in observed.iteritems())
    # 4. Calculate the Chi-squared p-value of G
    pvalue = chisqprob(G, 19)
    return pvalue