Exemplos de Arith em Python, exemplos de TAMO.util.Arith em Python

Exemplo n.º 1

0

Exibir arquivo

def orforflistmatch(s_orf,orflist):
    """
    Which annotations associated with s_orf are significantly overrepresented in orflist?
    """
    if not _orfs_by_go: load_GO()
    norfs   = len(orflist)
    totorfs = len(_gos_by_orf.keys())

    categories = []
    for anno in annotations(s_orf):
        categories.append(anno.desc)

    sigs = []
    for category in categories:
        all_by_cat = annotation2orfs(category)
        nall       = len(all_by_cat)
        fracall    = float(nall)/float(totorfs)
        
        sub_by_cat = [x for x in orflist if x in all_by_cat]
        nsub       = len(sub_by_cat)

        if norfs == 0: fracsub = 0
        else:          fracsub    = float(nsub)/float(norfs)

        sig        = Arith.hypgeomsummore(nall,totorfs,norfs,nsub)
        sigs.append( (sig, category, fracall, fracsub, sub_by_cat) )
        #if s_orf == SGD.gene2orf('NRG1'):
        #    print '@ NRG1 %5.2e %-40s %4d %4d %4d %4d '%(
        #        sig,category,nall,totorfs,norfs,nsub)
    sigs.sort()

    #for sig in sigs: print sig
    return sigs

Exemplo n.º 2

0

Exibir arquivo

Arquivo: tamo2table.py Projeto: malhamdoosh/abseqPy

def tablemotifs(_motifs):
    file   = _motifs[0].file
    gifdir = 'logos_'+file+'.dir'
    
    motifs = [m.trimmed(0.25) for m in _motifs]
    cleanmotifs(motifs)

    top = getarg('top')
    if top: motifs = motifs[0:top]

    for m in motifs:
        if m.source:
            toks = m.source.split()
            m.name = toks[0].split('_')[0]
            if (len(toks) > 2) and ( toks[2] != '[]'):
                ids = toks[2].split(',')
                m.matchids = ', '.join([x.split('-')[-1] for x in ids])
            else: m.matchids ='&nbsp;'
        else:
            m.name = ''
            m.matchids = ''

    for i in range(len(motifs)):
        j = i + 1
        motifs[i].i = j
        if getarg('GIF'):
            gifname = motifs[i].giflogo(id='%s.%s'%(file,i), title=' ', scale=0.6, info_str=' ')
            if (not os.path.exists(gifdir)):   os.mkdir(gifdir)
            motifs[i].gifname = '%s/%s'%(gifdir,gifname)
            os.rename(gifname,motifs[i].gifname)

    txts = []
    for m in motifs:
        s=   ''
        s=s+ '<tr>'
        s=s+ '<td>%s</td>'%(m.name)
        s=s+ '<td>%d</td>'%(m.i) 
        s=s+ '<td>%s</td>'%(m.matchids)
        if getarg('GIF'): s=s+ '<td><img src="%s"></td>'%(m.gifname)
        else:   s=s+ '<td>%s</td>'%(m.oneletter)
        #s=s+ '<td>%5.2f</td>'%(m.i)
        try:
            s=s+ '<td>%3d</td><td>%5.1f%%</td>'%(m.numprobes,m.frac*100)
        except:
            s=s+ '<td>n/a</td><td>n/a</td>'
        s=s+ '<td>%5.2f</td>'%(Arith.nlog10(m.pvalue))
        try:
            s=s+ '<td>%5.2f</td>'%(m.ROC_auc)
        except:
            s=s+ '<td>%5.2f</td>'%(0.0)
        if getarg('MAP'):
            s=s+ '<td>%6.2f</td>'%(m.MAP)
        if getarg('CONS'):
            s=s+ '<td>%3.0f%% (%d)</td>'%(m.Cfrac*100,m.numconsmatchbound)
            s=s+ '<td>%d/%d -> %6.2f</td>'%(m.inC,m.inC+m.inU,m.CSimproved)
        if getarg('PROGRAMS'):
            s=s+ '<td>%s</td>'%("<br>".join(m.programs))
        s=s+ "</tr>\n"
        txts.append(s)
    print ''.join(txts);  sys.stdout.flush()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: MotifCompare.py Projeto: adamlabadorf/TAMO

def probOvlpBinomial(A,B,thresh=0.7,verbose=None):
    if A.width >= B.width:
        Wide, Narrow = A, B
    else:
        Wide, Narrow = B, A

    RC = MotifTools.revcomplement
    newWide  = Wide[-1,Wide.width+1]
    if Wide.__dict__.has_key('bestWide'):
        bestWide = Wide.bestWide
    else:
        bestWideD = {}
        for x in newWide.bestseqs(thresh*newWide.maxscore):
            bestWideD[x[1]] = 1
        for x in bestWideD.keys():
            bestWideD[RC(x)] = 1
        Wide.bestWide = bestWideD.keys()
        bestWide = Wide.bestWide
    Wide = newWide

    D={}
    for i in range(len(bestWide)):
        D[i] = bestWide[i]
    P = ProbeSet(genome=D)
    matchNarrow = P.count_matching_probes(Narrow,thresh=thresh)
    
    if matchNarrow == 0:
        p = 1.0
        return p
    
    if not Narrow.__dict__.has_key('probNarrow'): Narrow.probNarrow = {}
    if Narrow.probNarrow.has_key(Wide.width):
        probNarrow = Narrow.probNarrow[Wide.width]
    else:
        probNarrow = estimate_frequency(Narrow,Wide.width,thresh=thresh)
        Narrow.probNarrow[Wide.width] = probNarrow

    p = Arith.binomialsumtail(probNarrow,len(bestWide),matchNarrow)
    print '\nD= %7.3f %9.4e %8d %7d %-14s %-20s %-14s %-20s'%(
        Arith.nlog10(p),probNarrow,len(bestWide),matchNarrow,
        A.family,A,B.family,B)


    return p

Exemplo n.º 4

0

Exibir arquivo

def probOvlpBinomial(A, B, thresh=0.7, verbose=None):
    if A.width >= B.width:
        Wide, Narrow = A, B
    else:
        Wide, Narrow = B, A

    RC = MotifTools.revcomplement
    newWide = Wide[-1, Wide.width + 1]
    if Wide.__dict__.has_key('bestWide'):
        bestWide = Wide.bestWide
    else:
        bestWideD = {}
        for x in newWide.bestseqs(thresh * newWide.maxscore):
            bestWideD[x[1]] = 1
        for x in bestWideD.keys():
            bestWideD[RC(x)] = 1
        Wide.bestWide = bestWideD.keys()
        bestWide = Wide.bestWide
    Wide = newWide

    D = {}
    for i in range(len(bestWide)):
        D[i] = bestWide[i]
    P = ProbeSet(genome=D)
    matchNarrow = P.count_matching_probes(Narrow, thresh=thresh)

    if matchNarrow == 0:
        p = 1.0
        return p

    if not Narrow.__dict__.has_key('probNarrow'): Narrow.probNarrow = {}
    if Narrow.probNarrow.has_key(Wide.width):
        probNarrow = Narrow.probNarrow[Wide.width]
    else:
        probNarrow = estimate_frequency(Narrow, Wide.width, thresh=thresh)
        Narrow.probNarrow[Wide.width] = probNarrow

    p = Arith.binomialsumtail(probNarrow, len(bestWide), matchNarrow)
    print '\nD= %7.3f %9.4e %8d %7d %-14s %-20s %-14s %-20s' % (Arith.nlog10(
        p), probNarrow, len(bestWide), matchNarrow, A.family, A, B.family, B)

    return p

Exemplo n.º 5

0

Exibir arquivo

def ace2tamo(filename, tamoname):
    global probefile, PROBESET
    if   re.search('\.ace$',filename):
        mdobject = AlignAce.AlignAce(filename)
    elif re.search('\.meme$',filename):
        mdobject = Meme.Meme(filename)

    fsaname = find_fsa(mdobject.fastafile)
    fsaD    = MotifMetrics.fasta2seqs(fsaname,'want_dict')
    probes  = fsaD.keys()
    if not probefile:
        PROBESET = MotifMetrics.ProbeSet('HUMAN_250')
        #PROBESET= pick_genome(fsaname)
    for key,seq in fsaD.items():
        PROBESET.probes[key] = seq

    for motif in mdobject.motifs:
        if motif.pvalue == 1: motif.pvalue = PROBESET.p_value(motif,probes,'v')
        if motif.church == 1: motif.church = PROBESET.church(motif,probes,'v')
        if motif.E_site == None: motif.E_site = PROBESET.E_sitef(motif,probes,3,'v')
        #if motif.E_chi2 == None: motif.E_chi2 = PROBESET.E_chi2(motif,probes,None,'v')
        if motif.E_seq  == None: motif.E_seq  = PROBESET.E_seq(motif,probes,'v')
        if motif.ROC_auc== None: motif.ROC_auc= PROBESET.ROC_AUC(motif,probes,'v')
        if motif.MNCP   == None: motif.MNCP   = PROBESET.MNCP(motif,probes,'v')
        if re.search('\.meme$',filename):
            motif.MAP = -math.log(motif.evalue)/math.log(10)
        sys.stdout.flush()

    i = 0
    for motif in mdobject.motifs:
        motif.seednum = i ; i=i+1
        kmers = motif.bogus_kmers(100)
        motif.maxscore = -100
        scores = [motif.scan(kmer)[2][0] for kmer in kmers]
        print Arith.avestd(scores)

    if re.search('\.meme$',filename):
        mdobject.motifs.sort(lambda x,y: cmp(x.pvalue, y.pvalue))
    else:
        mdobject.motifs.sort(lambda x,y: cmp(x.church, y.church))

    MotifTools.save_motifs(mdobject.motifs,tamoname)

Exemplo n.º 6

0

Exibir arquivo

def orflist2categories_long(orflist,thresh=0.05):
    """
    Which categories are overrepresented among orflist.  Thresh is applied after Bonferroni
    correction.

    Returns: Sorted list of tuples, [(signifance, category, fracall, fracsub, orflist), ...]
    """
    if not _orfs_by_go: load_GO()
    norfs   = len(orflist)
    totorfs = len(_gos_by_orf.keys())

    #Determine which categories are described by the set
    categories = []
    for orf in orflist:
        for anno in annotations(orf):
            if anno.desc not in categories:
                categories.append(anno.desc)

    totcats = float(len(categories))
    sigs = []
    for category in categories:
        all_by_cat = annotation2orfs(category)
        nall       = len(all_by_cat)
        fracall    = float(nall)/float(totorfs)
        
        sub_by_cat = [x for x in orflist if x in all_by_cat]
        nsub       = len(sub_by_cat)
        fracsub    = float(nsub)/float(norfs)

        sig        = Arith.hypgeomsummore(nall,totorfs,norfs,nsub) * totcats
        #print category,sig,nall,totorfs,norfs,nsub,'       ',totcats
        sigs.append( (sig, category, fracall, fracsub, sub_by_cat) )
        
    sigs.sort()

    ans = []
    for sigdata in sigs:
        sig, category, fracall, fracsub, orfs = sigdata
        if sig > thresh: continue
        ans.append(sigdata)

    return ans

Exemplo n.º 7

0

Exibir arquivo

Arquivo: MotifCompare.py Projeto: adamlabadorf/TAMO

def NLBPO(A,B,thresh):
    p = probOvlpBinomial(A,B,thresh)
    return Arith.nlog10(p)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: MotifCompare.py Projeto: adamlabadorf/TAMO

def NLPO(A,B,thresh):
    p = probOvlp(A,B,thresh)
    return Arith.nlog10(p)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: MotifCompare.py Projeto: adamlabadorf/TAMO

def probOvlp(A,B,thresh=0.7,verbose=None):
    if A.width >= B.width:
        Wide, Narrow = A, B
    else:
        Wide, Narrow = B, A

    RC = MotifTools.revcomplement
    if 1:
        newWide  = Wide[-1,Wide.width+1]
        if Wide.__dict__.has_key('bestWide'):
            bestWide = Wide.bestWide
        else:
            bestWideD = {}
            for x in newWide.bestseqs(thresh*newWide.maxscore):
                bestWideD[x] = 1
            for x in bestWideD.keys():
                bestWideD[RC(x)] = 1
            Wide.bestWide = bestWideD.keys()
            bestWide = Wide.bestWide
        Wide = newWide
    
        if Narrow.__dict__.has_key('bestNarrow'):
            bestNarrow = Narrow.bestNarrow
        else:
            bestNarrowD = {}
            for x in Narrow.bestseqs(thresh*Narrow.maxscore):
                bestNarrowD[x] = 1
            for x in bestNarrowD.keys():
                bestNarrowD[RC(x)] = 1
            bestNarrow = bestNarrowD.keys()
            Narrow.bestNarrow = bestNarrow
        
    #bestWide   = [x[1] for x in Wide.bestseqs  (thresh*Wide.maxscore)  ]
    #bestNarrow = [x[1] for x in Narrow.bestseqs(thresh*Narrow.maxscore)]

    countNarrow = len(bestNarrow)
    countWide   = len(bestWide)

    numtotal    = math.pow(4,Wide.width)
    fudgefactor = math.pow(4,Wide.width - Narrow.width)

    bestWideTups = [(x,MotifTools.revcomplement(x)) for x in bestWide]

    countBoth = 0
    for i in range(len(bestNarrow)):
        m_narrow = bestNarrow[i]
        delj = []

        for j in range(len(bestWideTups)):
            if (bestWideTups[j][0].find(m_narrow) >= 0) or (bestWideTups[j][1].find(m_narrow) >= 0):
                countBoth += 1
                delj.append(j)

        delj.reverse()  #Chew in from the back
        for j in delj:
            del(bestWideTups[j])


    if verbose: print '%10d %10d %10d %10d | %10d  %5d '%(
        countWide, numtotal, countNarrow *fudgefactor , countBoth , countNarrow, Wide.width - Narrow.width),
    
    p = Arith.hypgeomsummore(countWide,                 #Num Interesting
                             numtotal,                  #All k-mers
                             countNarrow * fudgefactor, #Number picked
                             countBoth                ) #Number found
    return p

Exemplo n.º 10

0

Exibir arquivo

Arquivo: Converge.py Projeto: xtina/ExamiNDR

def conservation_pvalue(nmer, IDs, fsaDict, ConsDict, num_alignments):
    width = len(nmer)
    total = []
    unconserved = []
    thetas = []
    for i in range(num_alignments-1):
        total.append(0)
        unconserved.append(0)
        tot_positions = 0
        tot_unconserved = 0
        for ID in IDs:
            try:
                cons = ConsDict[ID]
            except:
                continue
            if (cons[i]==[]): continue
            try:
                tot_positions = tot_positions + len(cons[i])
                tot_unconserved = tot_unconserved + cons[i].count(1)
            except:
                print "cons: %s"%cons
        try:
            thetas.append(float(tot_unconserved)/tot_positions)
        except:
            thetas.append(1.0)
    #print thetas
    for ID in IDs:
        seq = fsaDict[ID]
        seqrc = ConvergeMotifTools.revcomplement(seq)
        try:
            cons = ConsDict[ID]
        except:
            continue
        if (cons==[]):
            continue
        hits = []
        hitsr = []
        if (type(nmer)==type(ConvergeMotifTools.Motif())):
            hits = cluster.matches_old(nmer, seq, 0.7)
            #print len(hits)
        else:
            site_re = re.compile(AmbigToRegExp(nmer))
            hit = site_re.search(seq)
            hitr = site_re.search(seqrc)
            while (hit!=None):
                hits.append(hit.start())
                hit = site_re.search(seq,hit.end())
            while (hitr!=None):
                if (hits.count(len(seq)-hitr.end()-1)==0):
                    hits.append(len(seq) - hitr.end()-1)
                hitr = site_re.search(seqrc,hitr.end())
        for hit in hits:
            for i in range(width):
                for j in range(num_alignments-1):
                    if (cons[j]!=[]):
                        total[j] = total[j] + 1
                        unconserved[j] = unconserved[j] + cons[j][hit+i]
    mean = 0
    var = 0
    uc = 0
    for i in range(num_alignments-1):
        m = total[i]*thetas[i]
        #nq = total[i]*(1-thetas[i])
        #if (min(nq,m)<5): return(0.5)
        mean = mean + m
        var = var + total[i]*thetas[i]*(1-thetas[i])
        uc = uc + unconserved[i]
    stdev = math.sqrt(var)
    if (stdev>0):
        Z = ((uc + 0.5)-mean)/stdev
    else:
        Z = 0
    if (Z>=0):
        p = Arith.lzprob(Z)
    else:
        p = 1.0 - Arith.lzprob(-Z)
    return(p)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: WMWtest.py Projeto: malhamdoosh/abseqPy

def WMWtest(A, B):
    """
    WMWtest(A,B) -- Computes the Wilcoxon-Mann-Whitney nonparametric W statistic for two distributions

    input:  list of numbers, list of numbers
    output: p-value, W-statistic
    """
    A.sort()
    B.sort()
    TotalList = A + B
    TotalList.sort()

    nA = len(A)
    nB = len(B)
    N = nA + nB
    MaxSum = N * (N + 1) / 2.0
    H0 = MaxSum / 2.0

    ## Replace values by ranks
    previous = []
    start = 0
    Total_rank = TotalList[:]
    for i in range(len(TotalList)):
        if (TotalList[i] == previous):
            mean_rank = (start + i + 2) / 2.0
            for j in range(start, i + 1):
                Total_rank[j] = mean_rank
        else:
            Total_rank[i] = i + 1
            previous = TotalList[i]
            start = i

    ## Determine the shortest list
    if nA < nB: shortest = A
    else: shortest = B
    nShortest = len(shortest)

    ## Summ the ranks in the shortest list
    W = 0
    for Value in shortest:
        i = 0
        while (i < len(TotalList) and Value != TotalList[i]):
            i += 1
        W += Total_rank[i]

    ## Use the smallest value of $W
    if (W > H0): W = MaxSum - W

    ## Determine the two-tailed level of significance
    p = 0

    ## First calculate the Normal approximation. This can be used to
    ## check whether a significant result is plausable for larger N.
    Permutations = k_out_n(nA, N)
    if (Permutations >= 25000) or (nShortest > 10):
        if W >= H0: Continuity = -0.5
        else: Continuity = 0.5
        Z = (W + Continuity - nShortest *
             (N + 1.0) / 2.0) / sqrt(nA * nB * (N + 1) / 12.0)
        Z = fabs(Z)
        p = 2 * (1 - Arith.lzprob(Z))

    ## The exact level of significance, for large N, first check whether a
    ## significant result is plausable, i.e., the Normal Approximation gives
    ## a $p < 0.25.
    if (nShortest + 1 < 10) and (p < 0.25) and (Permutations < 60000):
        # Remember that $W must be SMALLER than $MaxSum/2=$H0
        Less = CountSmallerRanks(W, 0, len(shortest) - 1, 0, Total_rank)
        # If $Less < $Permutations/2, we have obviously calculated the
        # wrong way. We should have calculated UPWARD (higher than W)
        # We can't do that, but we can calculate $Less for $W-1 and
        # subtract it from $Permutations
        if (2 * Less > Permutations):
            Less = CountSmallerRanks(W - 1, 0,
                                     len(shortest) - 1, 0, Total_rank)
            Less = Permutations - Less
        SumFrequencies = Permutations
        p = 2.0 * Less / SumFrequencies

    return p, W

Exemplo n.º 12

0

Exibir arquivo

Arquivo: Converge.py Projeto: xtina/ExamiNDR

def conservation_pvalue(nmer, IDs, fsaDict, ConsDict, num_alignments):
    width = len(nmer)
    total = []
    unconserved = []
    thetas = []
    for i in range(num_alignments - 1):
        total.append(0)
        unconserved.append(0)
        tot_positions = 0
        tot_unconserved = 0
        for ID in IDs:
            try:
                cons = ConsDict[ID]
            except:
                continue
            if (cons[i] == []): continue
            try:
                tot_positions = tot_positions + len(cons[i])
                tot_unconserved = tot_unconserved + cons[i].count(1)
            except:
                print "cons: %s" % cons
        try:
            thetas.append(float(tot_unconserved) / tot_positions)
        except:
            thetas.append(1.0)
    #print thetas
    for ID in IDs:
        seq = fsaDict[ID]
        seqrc = ConvergeMotifTools.revcomplement(seq)
        try:
            cons = ConsDict[ID]
        except:
            continue
        if (cons == []):
            continue
        hits = []
        hitsr = []
        if (type(nmer) == type(ConvergeMotifTools.Motif())):
            hits = cluster.matches_old(nmer, seq, 0.7)
            #print len(hits)
        else:
            site_re = re.compile(AmbigToRegExp(nmer))
            hit = site_re.search(seq)
            hitr = site_re.search(seqrc)
            while (hit != None):
                hits.append(hit.start())
                hit = site_re.search(seq, hit.end())
            while (hitr != None):
                if (hits.count(len(seq) - hitr.end() - 1) == 0):
                    hits.append(len(seq) - hitr.end() - 1)
                hitr = site_re.search(seqrc, hitr.end())
        for hit in hits:
            for i in range(width):
                for j in range(num_alignments - 1):
                    if (cons[j] != []):
                        total[j] = total[j] + 1
                        unconserved[j] = unconserved[j] + cons[j][hit + i]
    mean = 0
    var = 0
    uc = 0
    for i in range(num_alignments - 1):
        m = total[i] * thetas[i]
        #nq = total[i]*(1-thetas[i])
        #if (min(nq,m)<5): return(0.5)
        mean = mean + m
        var = var + total[i] * thetas[i] * (1 - thetas[i])
        uc = uc + unconserved[i]
    stdev = math.sqrt(var)
    if (stdev > 0):
        Z = ((uc + 0.5) - mean) / stdev
    else:
        Z = 0
    if (Z >= 0):
        p = Arith.lzprob(Z)
    else:
        p = 1.0 - Arith.lzprob(-Z)
    return (p)

Exemplo n.º 13

0

Exibir arquivo

def NLBPO(A, B, thresh):
    p = probOvlpBinomial(A, B, thresh)
    return Arith.nlog10(p)

Exemplo n.º 14

0

Exibir arquivo

def NLPO(A, B, thresh):
    p = probOvlp(A, B, thresh)
    return Arith.nlog10(p)

Exemplo n.º 15

0

Exibir arquivo

def probOvlp(A, B, thresh=0.7, verbose=None):
    if A.width >= B.width:
        Wide, Narrow = A, B
    else:
        Wide, Narrow = B, A

    RC = MotifTools.revcomplement
    if 1:
        newWide = Wide[-1, Wide.width + 1]
        if Wide.__dict__.has_key('bestWide'):
            bestWide = Wide.bestWide
        else:
            bestWideD = {}
            for x in newWide.bestseqs(thresh * newWide.maxscore):
                bestWideD[x] = 1
            for x in bestWideD.keys():
                bestWideD[RC(x)] = 1
            Wide.bestWide = bestWideD.keys()
            bestWide = Wide.bestWide
        Wide = newWide

        if Narrow.__dict__.has_key('bestNarrow'):
            bestNarrow = Narrow.bestNarrow
        else:
            bestNarrowD = {}
            for x in Narrow.bestseqs(thresh * Narrow.maxscore):
                bestNarrowD[x] = 1
            for x in bestNarrowD.keys():
                bestNarrowD[RC(x)] = 1
            bestNarrow = bestNarrowD.keys()
            Narrow.bestNarrow = bestNarrow

    #bestWide   = [x[1] for x in Wide.bestseqs  (thresh*Wide.maxscore)  ]
    #bestNarrow = [x[1] for x in Narrow.bestseqs(thresh*Narrow.maxscore)]

    countNarrow = len(bestNarrow)
    countWide = len(bestWide)

    numtotal = math.pow(4, Wide.width)
    fudgefactor = math.pow(4, Wide.width - Narrow.width)

    bestWideTups = [(x, MotifTools.revcomplement(x)) for x in bestWide]

    countBoth = 0
    for i in range(len(bestNarrow)):
        m_narrow = bestNarrow[i]
        delj = []

        for j in range(len(bestWideTups)):
            if (bestWideTups[j][0].find(m_narrow) >=
                    0) or (bestWideTups[j][1].find(m_narrow) >= 0):
                countBoth += 1
                delj.append(j)

        delj.reverse()  #Chew in from the back
        for j in delj:
            del (bestWideTups[j])

    if verbose:
        print '%10d %10d %10d %10d | %10d  %5d ' % (
            countWide, numtotal, countNarrow * fudgefactor, countBoth,
            countNarrow, Wide.width - Narrow.width),

    p = Arith.hypgeomsummore(
        countWide,  #Num Interesting
        numtotal,  #All k-mers
        countNarrow * fudgefactor,  #Number picked
        countBoth)  #Number found
    return p

Exemplo n.º 16

0

Exibir arquivo

Arquivo: WMWtest.py Projeto: adamlabadorf/TAMO

def WMWtest(A,B):
    """
    WMWtest(A,B) -- Computes the Wilcoxon-Mann-Whitney nonparametric W statistic for two distributions

    input:  list of numbers, list of numbers
    output: p-value, W-statistic
    """
    A.sort()
    B.sort()
    TotalList = A + B
    TotalList.sort()

    nA     = len(A)
    nB     = len(B)
    N      = nA + nB
    MaxSum = N*(N+1)/2.0
    H0     = MaxSum / 2.0
    
    
    ## Replace values by ranks
    previous = []
    start = 0
    Total_rank = TotalList[:]
    for i in range(len(TotalList)):
        if (TotalList[i] == previous):
            mean_rank = (start+i+2)/2.0
            for j in range(start,i+1):
                Total_rank[j] = mean_rank
        else:
            Total_rank[i] = i+1
            previous      = TotalList[i]
            start         = i
    
    ## Determine the shortest list
    if nA < nB: shortest = A
    else:       shortest = B
    nShortest = len(shortest);

    ## Summ the ranks in the shortest list
    W = 0
    for Value in shortest:
        i = 0
        while (i < len(TotalList) and Value != TotalList[i]): i += 1
        W += Total_rank[i]

    ## Use the smallest value of $W
    if (W > H0): W = MaxSum - W

    ## Determine the two-tailed level of significance
    p = 0

    ## First calculate the Normal approximation. This can be used to
    ## check whether a significant result is plausable for larger N.
    Permutations = k_out_n(nA, N)
    if (Permutations >= 25000) or (nShortest > 10):
        if W >= H0: Continuity = -0.5
        else:       Continuity =  0.5
        Z = (W+Continuity-nShortest*(N+1.0)/2.0)/sqrt(nA*nB*(N+1)/12.0);
        Z = fabs(Z)
        p = 2*(1-Arith.lzprob(Z))
    

    ## The exact level of significance, for large N, first check whether a
    ## significant result is plausable, i.e., the Normal Approximation gives
    ## a $p < 0.25.
    if (nShortest+1 < 10) and (p < 0.25) and (Permutations < 60000):
        # Remember that $W must be SMALLER than $MaxSum/2=$H0
        Less = CountSmallerRanks(W, 0 , len(shortest)-1, 0, Total_rank)
        # If $Less < $Permutations/2, we have obviously calculated the 
        # wrong way. We should have calculated UPWARD (higher than W)
        # We can't do that, but we can calculate $Less for $W-1 and
        # subtract it from $Permutations
        if (2*Less > Permutations):
            Less = CountSmallerRanks(W-1, 0, len(shortest)-1, 0, Total_rank)
            Less = Permutations - Less
        SumFrequencies = Permutations
        p = 2.0 * Less / SumFrequencies

    return p, W