Пример #1
0
def safe_sum_p_log_p(a, base=None):
    """Calculates p * log(p) safely for an array that may contain zeros."""
    flat = ravel(a)
    nz = take(flat, nonzero(flat))
    logs = log(nz)
    if base:
        logs /= log(base)
    return sum(nz * logs)
Пример #2
0
def G_fit(obs, exp, williams=1):
    """G test for fit between two lists of counts.

    Usage: test, prob = G_fit(obs, exp, williams)
    
    obs and exp are two lists of numbers.
    williams is a boolean stating whether to do the Williams correction.
    
    SUM(2 f(obs)ln (f(obs)/f(exp)))
    
    See Sokal and Rohlf chapter 17.
    """
    k = len(obs)
    if k != len(exp):
        raise ValueError, "G_fit requires two lists of equal length."
    G = 0
    n = 0
    
    for o, e in zip(obs, exp):
        if o < 0:
            raise ValueError, \
            "G_fit requires all observed values to be positive."
        if e <= 0:
            raise ZeroExpectedError, \
            "G_fit requires all expected values to be positive."
        if o:   #if o is zero, o * log(o/e) must be zero as well.
            G += o * log(o/e)
            n += o
    
    G *= 2
    if williams:
        q = 1 + (k + 1)/(6*n)
        G /= q

    return G, chi_high(G, k - 1)
Пример #3
0
def log2(x):
    """Returns the log (base 2) of x"
    
    WARNING: log2(0) will give -inf on one platform, but it might raise
    an error (Overflow or ZeroDivision on another platform. So don't rely
    on getting -inf in your downstream code.
    """
    return log(x)/ln_2
Пример #4
0
def G_ind(m, williams=False):
    """Returns G test for independence in an r x c table.
    
    Requires input data as a Numeric array. From Sokal and Rohlf p 738.
    """
    f_ln_f_elements = safe_sum_p_log_p(m)
    f_ln_f_rows = safe_sum_p_log_p(sum(m))
    f_ln_f_cols = safe_sum_p_log_p(sum(m,1))
    tot = sum(ravel(m))
    f_ln_f_table = tot * log(tot)

    df = (len(m)-1) * (len(m[0])-1)
    G = 2*(f_ln_f_elements-f_ln_f_rows-f_ln_f_cols+f_ln_f_table)
    if williams:
        q = 1+((tot*sum(1.0/sum(m,1))-1)*(tot*sum(1.0/sum(m))-1)/ \
            (6*tot*df))
        G = G/q
    return G, chi_high(max(G,0), df)
Пример #5
0
    def reindex(self):
        periods = self.periods
        period = self.series[-periods:]
        current = period[-1]
        mx = max(period)
        mn = min(period)
        try:
            inter = 0.33 * 2 * ((current - mn) / (mx - mn) - 0.5) + (0.67 * self.inter[-1])
            if inter > 0.99: 
                inter = 0.99
            elif inter < -0.99:
                inter = -0.99
            fish = 0.5 * log((1 + inter) / (1 - inter)) + (0.5 * self[-1])
        except (TypeError, IndexError, ZeroDivisionError, ):
            inter = 0
            fish = 0

        self.inter.append(inter)
        self.append(fish)
 def fn(x):
     if x == 0:
         return 0
     else:
         return x * (log(x) / log(2))
Пример #7
0
def G_2_by_2(a, b, c, d, williams=1, directional=1):
    """G test for independence in a 2 x 2 table.

    Usage: G, prob = G_2_by_2(a, b, c, d, willliams, directional)

    Cells are in the order:
    
        a b
        c d
    
    a, b, c, and d can be int, float, or long.
    williams is a boolean stating whether to do the Williams correction.
    directional is a boolean stating whether the test is 1-tailed.
    
    Briefly, computes sum(f ln f) for cells - sum(f ln f) for
    rows and columns + f ln f for the table.
    
    Always has 1 degree of freedom

    To generalize the test to r x c, use the same protocol:
    2*(cells - rows/cols + table), then with (r-1)(c-1) df.

    Note that G is always positive: to get a directional test,
    the appropriate ratio (e.g. a/b > c/d) must be tested
    as a separate procedure. Find the probability for the
    observed G, and then either halve or halve and subtract from
    one depending on whether the directional prediction was
    upheld. 
    
    The default test is now one-tailed (Rob Knight 4/21/03).

    See Sokal & Rohlf (1995), ch. 17. Specifically, see box 17.6 (p731).
    """
    cells = [a, b, c, d]
    n = sum(cells)
    #return 0 if table was empty
    if not n:
        return (0, 1)
    #raise error if any counts were negative
    if min(cells) < 0:
        raise ValueError, \
        "G_2_by_2 got negative cell counts(s): must all be >= 0."
    
    G = 0
    #Add x ln x for items, adding zero for items whose counts are zero
    for i in filter(None, cells):
        G += i * log(i)
    #Find totals for rows and cols
    ab = a + b
    cd = c + d
    ac = a + c
    bd = b + d
    rows_cols = [ab, cd, ac, bd]
    #exit if we are missing a row or column entirely: result counts as
    #never significant
    if min(rows_cols) == 0:
        return (0, 1)
    #Subtract x ln x for rows and cols
    for i in filter(None, rows_cols):
        G -= i * log(i)
    #Add x ln x for table
    G += n * log(n) 
    #Result needs to be multiplied by 2 
    G *= 2

    #apply Williams correction
    if williams:
        q = 1 + ((  ( (n/ab) + (n/cd) ) -1 ) * ( ( (n/ac) + (n/bd) ) -1))/(6*n)
        G /= q

    p = chi_high(max(G,0), 1)
    
    #find which tail we were in if the test was directional
    if directional:
        is_high =  ((b == 0) or (d != 0 and (a/b > c/d)))
        p = tail(p, is_high)
        if not is_high:
            G = -G
    return G, p
Пример #8
0
    def logImage(self):
	if self.fname == '': return
	from Numeric import log
	data = self.data
	da = log(data)
	self.createImage(da)
Пример #9
0
        idx_pairs = take(transform, data[:,0:2].astype(Int32))    
    else:
        idx_pairs = data[:,0:2].astype(Int32)
    #figure out biggest item if not supplied
    if num_items is None:
        num_items = int(max(ravel(idx_pairs))) + 1
    #make result array
    result = zeros((num_items,num_items), Float64)
    if len(data[0]) == 2:
        values = 1
    else:
        values = data[:,2]
    put(ravel(result), idx_pairs[:,0]*num_items+idx_pairs[:,1], values)
    return result

ln_2 = log(2)

def log2(x):
    """Returns the log (base 2) of x"
    
    WARNING: log2(0) will give -inf on one platform, but it might raise
    an error (Overflow or ZeroDivision on another platform. So don't rely
    on getting -inf in your downstream code.
    """
    return log(x)/ln_2

def safe_p_log_p(a):
    """Returns -(p*log2(p)) for every nonzero p in a.

    a: Numeric array
Пример #10
0
def exct(iline, upper, lower):
    above = -1.0 * hp * frq[iline] / kb
    below = upper / lower * glo[iline] / gup[iline]
    return above / log(below)
Пример #11
0
 def logImage(self):
     if self.fname == '': return
     from Numeric import log
     data = self.data
     da = log(data)
     self.createImage(da)