def estVar(self, num, epsilon): filename = self.BED.filename y = Pheno(filename + ".fam").read().val[:, 3] varEsts = self.divideData(filename, num=num) if epsilon < 0: return varEsts[0] e1 = .1 * epsilon e2 = .45 * epsilon e3 = .45 * epsilon vary = self.estVarY(y, e1) se2 = sum([v[1] for v in varEsts]) / float(num) + Lap( 0.0, vary / (e2 * float(num))) if se2 < 0: se2 = 0 if se2 > vary: se2 = vary sg2 = sum([v[0] for v in varEsts]) / float(num) + Lap( 0.0, vary / (e3 * float(num))) if sg2 < 0: sg2 = .01 * vary if sg2 > vary: sg2 = vary return [sg2, se2]
def WaldTest(y, MU, epsilon, snps, forFigs=False, coeff=1.0): if len(snps) == 0: I = [i for i in range(0, np.shape(MU.MU)[0])] eps = epsilon else: I = MU.snp_index(snps) eps = epsilon / float(len(snps)) if forFigs: eps = epsilon sc = MU.prod(y) [nm, sen] = MU.normY(y) mxMU = MU.maxMU() bot = nm**2 if sen > 0 and epsilon > 0: eps = .5 * eps bot = (nm + Lap(0.0, 2.0 * sen / epsilon))**2 if forFigs: eps = epsilon / 2.0 bot = [(nm + Lap(0, sen / eps))**2 for i in I] if eps > 0: sc = [sc[i] + Lap(0.0, mxMU[i] / eps) for i in I] else: sc = [sc[i] for i in I] if forFigs: return [sc[i]**2 / bot[i] for i in range(0, len(sc))] return [coeff * sc[i]**2 / bot for i in range(0, len(sc))]
def pertData(r0,r1,r2,s0,s1,s2,epsilon=1.0): R=r0+r1+r2; S=s0+s1+s2; N=R+S; x=2*r0+r1; y=2*s0+s1; xdp=x+Lap(0.0,2/epsilon);##perturbed x ydp=y+Lap(0.0,2/epsilon);##perturbed y return max(2*N*((xdp)*S-(ydp)*R)**2/float(R*S*(ydp+xdp)*(2*N-xdp-ydp)),2*N/float(2*N-1));
def Markov_N(a, b, eps): epsilons=[eps*i for i in range(1,11)]; line1=str(a.readline()); line2=str(b.readline()); le=len(line1.strip()); m=[0.0 for i in range(0,20)]; reps=20; for j in range(0,reps): line2=str(b.readline()); while line2: sm=0; c1=line2.strip(); line1=str(a.readline()); while line1: a1=line1.strip(); a1=a1+'0' idx=re.compile("(?=" + c1 + ")") idx=len(idx.findall(a1)) sm=sm+idx; line1=str(a.readline()); line1=line1.strip(); line2=str(b.readline()); line2=line2.strip(); d_N=sm; d_N=d+Lap(float(2*(le-N+1))/epsilons) return d_N
def PickTopNeigh(y, MU, mret, epsilon, reuse=False, snpList=""): n = len(y) ep1 = .1 * epsilon ep2 = .9 * epsilon sc = MU.prod(y) sc = [abs(s) for s in sc] bnd = sum(sorted(sc, reverse=True)[mret - 1:mret + 1]) / 2.0 bnd = bnd + Lap(0, np.max(np.abs(MU.MU)) / ep1) bnd = abs(bnd) print "Calculating Distance" neighDist = MU.neighDist(y, bnd, reuse=reuse) sc = [nei * ep2 / (2.0 * mret) for nei in neighDist] SNPS = [] if len(snpList) > 0: fil = open(snpList) lines = fil.readlines() fil.close() SNPS = [l.strip() for l in lines] I = MU.snp_index(SNPS) sc = [sc[i] for i in I] index_Ret = expPick(sc, mret) if len(snpList) > 0: return [SNPS[i] for i in index_Ret] return MU.snp_Names(index_Ret)
def WaldTest(y,MU,epsilon,snps): I=MU.snp_index(snps); eps=epsilon/float(len(snps)); sc=MU.prod(y); [nm,sen]=MU.normY(self,y); mxMU=MU.maxMU(); bot=nm**2; if sen>0: eps=.5*eps; bot=(nm+Lap(0.0,2.0*sen/epsilon))**2; sc=[sc[i]+Lap(0.0,2.0*mxMU[i]/eps) for i in I]; return [s**2/bot for s in sc];
def CI(y, MU, p, epsilon, snps): I = MU.snp_index(snps) eps = epsilon / float(len(snps)) q = math.sqrt(p) sc = MU.prod(y) [nm, sen] = MU.normY(y) mxMU = MU.maxMU() eps = .5 * eps botRoot = (nm + Lap(0.0, 2.0 * sen / epsilon)) bot = botRoot**2 sc = [sc[i] + Lap(0.0, mxMU[i] / eps) for i in I] yInter = interLap(botRoot, 2.0 * sen / epsilon, q) scInter = [interLap(sc[i], mxMU[I[i]] / eps, q) for i in range(0, len(I))] CIup = [scInter[i][1] / yInter[0] for i in range(0, len(I))] CIdown = [scInter[i][0] / yInter[1] for i in range(0, len(I))] waldEst = [s**2 / bot for s in sc] return [CIdown, waldEst, CIup]
def DPE(self, y, c, epsilon, reuse=False): bnd = c m = len(self.EIGN) # calculate the number of SNPs n = len(self.EIGN[0]) # calculate the number of data owners [y1, nm, sen] = self.normY(y) print 'nm=%s' % n print 'm=%s' % m sc2 = np.dot(self.EIGN, y1) # calculate the product of the matrixs sc2 = [float(s) for s in sc2] sc2 = [s for s in sc2 if not math.isnan(s)] J = [j for j in range(0, len(sc2))] mxEIGN = self.maxEIGN() mxEIGN = [s for s in maxEIGN if not math.isnan(s)] if epsilon < 0: bot = nm**2 sc2 = [(n - self.k - 1) * (sc2[i]**2) / bot for i in range(0, len(sc2))] else: sc2 = [sc2[j] + Lap(0.0, 2 * maxEIGN[j] / epsilon) for j in J] # genetopic data is added laplace nose bot = nm + Lap(0.0, 2 / epsilon) # phenotypic data is added laplace noise bot = bot**2 sc2 = [(n - self.k - 1) * (sc2[i]**2) / bot for i in range(0, len(sc2))] sc = np.abs(sc2) #print sc I = [i for i in range(0, len(sc)) if chi2(sc[i], n - K - 1) < bnd] # select the noisy significant SNPs whose P-value < bnd I = set(I) #sc3 = [] length = len(I) print length return I
def PickTopNoise(y, MU, mret, epsilon): n = len(y) sc = MU.prod(y) sc = [abs(s) for s in sc] m = len(sc) sens = MU.sens(mret) if epsilon < 0: scDP = sc else: scDP = [s + Lap(0, 2 * sens / epsilon) for s in sc] index_Ret = sorted([i for i in range(0, m)], key=lambda i: -scDP[i])[:mret] return MU.snp_Names(index_Ret)
def PickSigLap(y, EIGN, mret, epsilon): bnd = mret n = len(y) sc = EIGN.prod(y) sc = [abs(s) for s in sc] m = len(sc) sens = EIGN.sens(mret) if epsilon < 0: sc = sc else: sc = [s + Lap(0, 2 * sens / epsilon) for s in sc] sc = np.abs(sc) I = [i for i in range(0, len(sc)) if chi2(sc[i], n - K - 1) < bnd] # select the noisy significant SNPs whose P-value < bnd I = set(I) index_Ret = I print('locations of noisy significant SNPs based on Laplace method: {}'. format(index_Ret)) return EIGN.snp_Names(index_Ret)
def estNum(MU, y, pval, epsilon): #bnd_sc=math.sqrt(chi2.ppf((1.0-pval),df=1)); bnd_sc = math.sqrt(pval) ret = MU.normY(y) nm = ret[0] sen = ret[1] n = len(y) if sen > 0: bnd_est = bnd_sc * abs(nm + Lap(0.0, 2.0 * sen / epsilon)) if MU.k > 0: bnd_est = bnd_est / math.sqrt(n - MU.k - 1) else: bnd_est = bnd_sc * nm neigh = MU.neighDist(y, bnd_est) m = len(neigh) if epsilon < 0: return len([i for i in neigh if i > 0]) sc = [0.0 for i in range(0, m + 1)] for i in range(0, m): if neigh[i] > 0: sc[i] = -epsilon * neigh[i] / 4.0 else: sc[i] = epsilon * (neigh[i] - 1) / 4.0 for i in range(0, m + 1): sc[i] = math.exp(sc[i]) sm = sum(sc) sc = [i / sm for i in sc] multi = mult(1, sc) ret = min([i for i in range(0, m + 1) if multi[i] > 0]) if ret == m: return len([i for i in neigh if i > 0]) val = neigh[ret] v1 = len([i for i in neigh if i > val]) v2 = len([i for i in neigh if i == val]) return rand.randint(v1, v1 + v2 - 1)
def PickSigNeigh(y, EIGN, mret, epsilon, reuse=False, snpList=""): n = len(y) ep1 = .1 * epsilon ep2 = .9 * epsilon bnd = mret sc = EIGN.prod(y) sc = [abs(s) for s in sc] bnd = sum(sorted(sc, reverse=True)[1:n]) / 2.0 bnd = bnd + Lap(0, np.max(np.abs(EIGN.EIGN)) / ep1) bnd = abs(bnd) print "Calculating Distance" neighDist = EIGN.neighDist(y, bnd, reuse=reuse) sc = [nei * ep2 / (2.0 * n) for nei in neighDist] SNPS = [] if len(snpList) > 0: fil = open(snpList) lines = fil.readlines() fil.close() SNPS = [l.strip() for l in lines] I = EIGN.snp_index(SNPS) sc = [sc[i] for i in I] index_Ret = [i for i in range(0, len(sc)) if chi2(sc[i], n - K - 1) < bnd] # select the noisy significant SNPs whose P-value < bnd print( 'locations of noisy significant SNPs based on Neighbor distance method: {}' .format(index_Ret)) if len(snpList) > 0: return [SNPS[i] for i in index_Ret] return EIGN.snp_Names(index_Ret)
def PickTopNeigh(y,MU,mret,epsilon): n=len(y); ep1=.1*epsilon; ep2=.9*epsilon; sc=MU.prod(y); sc=[abs(s) for s in sc] bnd=sum(sorted(sc,reverse=True)[mret-1:mret+1])/2.0; bnd=bnd+Lap(0,max(MU.maxMU())/ep1); bnd=abs(bnd); print "Calculating Distance" neighDist=MU.neighDist(y,bnd); sc=[nei*ep2/(2.0*mret) for nei in neighDist]; index_Ret=expPick(sc,mret); return MU.snp_Names(index_Ret);
def estNum(MU,y,pval,epsilon): bnd_sc=math.sqrt(chi2.ppf((1.0-pval),df=1)); [nm,sen]=MU.normY(self,y); if sen>0: bnd_est=bnd_sc*abs(nm+Lap(0.0,2.0*sen/epsilon)); else: bnd_est=bnd_sc*nm; neigh=MU.neighDist(y,bnd_est); m=len(neigh); if epsilon<0: return len([i for i in neigh if i>0]); sc=[0.0 for i in range(0,m+1)]; for i in range(0,m): if neigh[i]>0: sc[i]=-.5*epsilon*neigh[i]/2.0; else: sc[i]=.5*epsilon*(neigh[i]-1)/2.0; for i in range(0,m+1): sc[i]=math.exp(sc[i]); sm=sum(sc); sc=[i/sm for i in sc] multi=mult(1,sc); ret=min([i for i in range(0,m+1) if multi[i]>0]); if ret==m: return len([i for i in neigh if i>0]); val=neigh[ret]; v1=len([i for i in neigh if i>val]); v2=len([i for i in neigh if i==val]); return rand.randint(v1,v1+v2-1);
def LapPick(mret,epsilon,scores,sens): m=len(scores) scoresLap=[s+Lap(0.0,sens*mret*2/epsilon) for s in scores];##perturbed scores return sorted([i for i in range(0,m)],key=lambda i:-scoresLap[i])[:mret];
def estVarY(self, y, epsilon): vr = np.var(y) n = len(y) return vr + Lap(0, 3 / float(epsilon * n))