def classify(self, n,r,p): if n == 0: return 0 q = 1-p; B = 1/math.log(1/p); u = math.log(n*q,1/p) BetaGamma = B*tnseq_tools.getGamma() if n<EXACT: # estimate more accurately based on expected run len, using exact calc for small genes exprun = self.ExpectedRuns_cached(n,p) u = exprun-BetaGamma # u is mu of Gumbel (mean=mu+gamma*beta); matching of moments pval = 1 - scipy.exp(scipy.stats.gumbel_r.logcdf(r,u,B)) if pval < 0.05: return(1) else: return(0)
def F_non(self, p, N, R): # pass in P_nonins as p q = 1.0 - p; BetaGamma = tnseq_tools.getGamma()/math.log(1/p) total = numpy.log(scipy.stats.beta.pdf(p,ALPHA,BETA)) mu = numpy.log(N*q) / numpy.log(1/p) for i in range(len(N)): # estimate more accurately based on expected run len, using exact calc for small genes if N[i]<EXACT: mu[i] = self.ExpectedRuns_cached(int(N[i]),p)-BetaGamma sigma = 1/math.log(1/p); #for i in range(len(N)): print '\t'.join([str(x) for x in N[i],R[i],self.ExpectedRuns_cached(int(N[i]),q),mu[i],scipy.stats.gumbel_r.pdf(R[i], mu[i], sigma)]) total += numpy.sum(scipy.stats.gumbel_r.logpdf(R, mu, sigma)) return(total)
def F_non(self, p, N, R): # pass in P_nonins as p q = 1.0 - p; BetaGamma = tnseq_tools.getGamma()/math.log(1/p) total = numpy.log(scipy.stats.beta.pdf(p,ALPHA,BETA)) mu = numpy.log(N*q) / numpy.log(1/p) for i in range(len(N)): # estimate more accurately based on expected run len, using exact calc for small genes if N[i]<EXACT: mu[i] = self.ExpectedRuns_cached(int(N[i]),p)-BetaGamma sigma = 1/math.log(1/p); #for i in range(len(N)): print('\t'.join([str(x) for x in N[i],R[i],self.ExpectedRuns_cached(int(N[i]),q),mu[i],scipy.stats.gumbel_r.pdf(R[i], mu[i], sigma)])) total += numpy.sum(scipy.stats.gumbel_r.logpdf(R, mu, sigma)) return(total)
def sample_Z(self, p, w1, N, R, S, T, mu_s, sigma_s, SIG): G = len(N) q = 1.0-p BetaGamma = tnseq_tools.getGamma()/math.log(1/p) mu = numpy.log(N*q) / numpy.log(1/p) for i in range(len(N)): # estimate more accurately based on expected run len, using exact calc for small genes if N[i]<EXACT: mu[i] = self.ExpectedRuns_cached(int(N[i]),p)-BetaGamma sigma = 1.0/math.log(1.0/p); h0 = ((scipy.exp(scipy.stats.gumbel_r.logpdf(R,mu,sigma))) * scipy.stats.norm.pdf(S, mu_s*R, sigma_s) * (1-w1)) h1 = SIG * w1 h1 += 1e-10; h0 += 1e-10 # to prevent div-by-zero; if neither class is probable, p(z1) should be ~0.5 p_z1 = h1/(h0+h1) return scipy.stats.binom.rvs(1, p_z1, size=G)