Exemplo n.º 1
0
def significance(ev1, ev2, cutoff):
    numpy = grace.get_numpy()

    # Avoid duplicates in SIG_CACHE
    if ev2 < ev1:
        ev1, ev2 = ev2, ev1

    options = {}
    for item in ev1:
        options[item[0]] = len(options)
    for item in ev2:
        if item[0] not in options:
            options[item[0]] = len(options)

    n = len(options)

    matrix = numpy.zeros((n, 2), int)
    for item in ev1:
        matrix[options[item[0]], 0] = item[1]
    for item in ev2:
        matrix[options[item[0]], 1] = item[1]

    #s = fexact(matrix)
    #if s == 0:
    #    print matrix, s
    #return fexact(matrix)

    key = (tuple(int(i) for i in matrix.ravel()), cutoff)
    if key not in SIG_CACHE:
        SIG_CACHE[key] = fexact(matrix, cutoff)
    return SIG_CACHE[key]
Exemplo n.º 2
0
def significance(ev1, ev2, cutoff):
    numpy = grace.get_numpy()

    # Avoid duplicates in SIG_CACHE
    if ev2 < ev1:
        ev1,ev2 = ev2,ev1

    options = { }
    for item in ev1:
        options[item[0]] = len(options)
    for item in ev2:
        if item[0] not in options:
            options[item[0]] = len(options)
    
    n = len(options)
    
    matrix = numpy.zeros((n,2),int)
    for item in ev1:
        matrix[ options[item[0]], 0 ] = item[1]
    for item in ev2:
        matrix[ options[item[0]], 1 ] = item[1]
    
    #s = fexact(matrix)
    #if s == 0:
    #    print matrix, s
    #return fexact(matrix)
    
    key = (tuple(int(i) for i in matrix.ravel()),cutoff)
    if key not in SIG_CACHE:
        SIG_CACHE[key] = fexact(matrix,cutoff)    
    return SIG_CACHE[key]
Exemplo n.º 3
0
def fexact(matrix, significance_cutoff):
    numpy = grace.get_numpy()

    matrix = numpy.array(matrix)
    n_row, n_col = matrix.shape
    row_sum = numpy.sum(matrix, 1)
    col_sum = numpy.sum(matrix, 0)
    n = numpy.sum(row_sum)

    cutoff = sum([log_fac(item) for item in matrix.ravel()]) * SUM_ERROR_MARGIN

    const_part = sum([log_fac(item) for item in row_sum]) + sum(
        [log_fac(item) for item in col_sum]) - log_fac(n)

    significance = [0.0]

    row_remainders = row_sum.copy()

    def generate(row, col, col_remainder, total):
        cell_min = max(0, col_remainder - numpy.sum(row_remainders[row + 1:]))
        cell_max = min(col_remainder, row_remainders[row])

        #next_row_remainders = row_remainders.copy()
        old_row_remainder = row_remainders[row]
        for i in xrange(cell_min, cell_max + 1):
            next_total = total + log_fac(i)
            row_remainders[row] = old_row_remainder - i
            if row + 1 >= n_row:
                if col + 1 >= n_col:
                    if next_total >= cutoff:
                        significance[0] += numpy.exp(const_part - next_total)
                        if significance[0] > significance_cutoff:
                            raise Cutoff_exceeded
                else:
                    generate(0, col + 1, col_sum[col + 1], next_total)
            else:
                generate(row + 1, col, col_remainder - i, next_total)
        row_remainders[row] = old_row_remainder

    try:
        generate(0, 0, col_sum[0], 0.0)
    except Cutoff_exceeded:
        return None

    return significance[0]
Exemplo n.º 4
0
def fexact(matrix, significance_cutoff):
    numpy = grace.get_numpy()

    matrix = numpy.array(matrix)
    n_row, n_col = matrix.shape
    row_sum = numpy.sum(matrix,1)
    col_sum = numpy.sum(matrix,0)
    n = numpy.sum(row_sum)

    cutoff = sum([ log_fac(item) for item in matrix.ravel() ]) * SUM_ERROR_MARGIN

    const_part = sum([ log_fac(item) for item in row_sum ]) + sum([ log_fac(item) for item in col_sum ]) - log_fac(n)
    
    significance = [ 0.0 ]
    
    row_remainders = row_sum.copy()
    def generate(row, col, col_remainder, total):
        cell_min = max(0, col_remainder - numpy.sum(row_remainders[row+1:]))
        cell_max = min(col_remainder, row_remainders[row])
        
        #next_row_remainders = row_remainders.copy()
        old_row_remainder = row_remainders[row]
        for i in xrange(cell_min,cell_max+1):
            next_total = total + log_fac(i)            
            row_remainders[row] = old_row_remainder - i
            if row+1 >= n_row:
                if col+1 >= n_col:
                    if next_total >= cutoff:
                        significance[0] += numpy.exp( const_part-next_total )
                        if significance[0] > significance_cutoff:
                            raise Cutoff_exceeded
                else:
                    generate(0, col+1, col_sum[col+1], next_total)
            else:
                generate(row+1, col, col_remainder-i, next_total)
        row_remainders[row] = old_row_remainder

    try:
        generate(0,0,col_sum[0],0.0)
    except Cutoff_exceeded:
        return None
    
    return significance[0]