Exemplo n.º 1
0
def get_best_hamming_enrichment_from_counts(w, P, N, pos_counts, neg_counts, print_dists=False):
    """
    Find the most enriched Hamming distance for given counts.
    Returns (best_dist, best_log_pvalue, best_p, best_n)
    """

    # get cumulative counts
    cum_pos_counts = copy.copy(pos_counts)
    cum_neg_counts = copy.copy(neg_counts)
    for i in range(1, len(pos_counts)):
        cum_pos_counts[i] += cum_pos_counts[i-1]
        cum_neg_counts[i] += cum_neg_counts[i-1]

    # compute hypergeometric enrichment at each distance and save best
    best_dist = w
    best_log_pvalue = 1     # infinity
    best_p = 0
    best_n = 0
    for i in range(len(pos_counts)):
        p = cum_pos_counts[i]
        n = cum_neg_counts[i]
        log_pvalue = getLogFETPvalue(p, P, n, N, best_log_pvalue)
        if log_pvalue < best_log_pvalue:
            best_log_pvalue = log_pvalue
            best_dist = i
            best_p = p
            best_n = n
        if print_dists:
            pv_string = sprint_logx(log_pvalue, 1, _pv_format)
            print "d %d : %d %d %d %d %s" % (i, p, P, n, N, pv_string)

    return best_dist, best_log_pvalue, best_p, best_n
def get_best_hamming_enrichment_from_counts(w, P, N, pos_counts, neg_counts, print_dists=False):
    """
    Find the most enriched Hamming distance for given counts.
    Returns (best_dist, best_log_pvalue, best_p, best_n)
    """

    # get cumulative counts
    cum_pos_counts = copy.copy(pos_counts)
    cum_neg_counts = copy.copy(neg_counts)
    for i in range(1, len(pos_counts)):
        cum_pos_counts[i] += cum_pos_counts[i-1]
        cum_neg_counts[i] += cum_neg_counts[i-1]

    # compute hypergeometric enrichment at each distance and save best
    best_dist = w
    best_log_pvalue = 1     # infinity
    best_p = 0
    best_n = 0
    for i in range(len(pos_counts)):
        p = cum_pos_counts[i]
        n = cum_neg_counts[i]
        log_pvalue = getLogFETPvalue(p, P, n, N, best_log_pvalue)
        if log_pvalue < best_log_pvalue:
            best_log_pvalue = log_pvalue
            best_dist = i
            best_p = p
            best_n = n
        if print_dists:
            pv_string = sprint_logx(log_pvalue, 1, _pv_format)
            print "d %d : %d %d %d %d %s" % (i, p, P, n, N, pv_string)

    return best_dist, best_log_pvalue, best_p, best_n
Exemplo n.º 3
0
 def fit_fisher(p, P, n, N):
     #pvalue_threshold = 0.5
     pvalue_threshold = False # Modification to allow p-value above 0.5
     return getLogFETPvalue(p, P, n, N, pvalue_threshold)