heterocommon=0 p = [0.2, 0.2, 0.2] notes= [defaultdict(lambda:0), defaultdict(lambda:0), defaultdict(lambda:0)] errprob=0.1 for line in file('snplist'): [ch, pos, snp, val, prob, qual, f] = line.split('\t') prob=float(prob) qual=float(qual) if prob==0: continue if snp not in yaniv: no23me += 1 continue if len(yaniv[snp])==1 or yaniv[snp][0]==yaniv[snp][1]: to_update=[0,2] else: to_update=[1,2] for i in to_update: p[i] = update(val, yaniv[snp], p[i], prob, notes[i], i==0, error_rate=.05) print 'homozyg hits=%d, heterozyg hits=%d, sex chromosome hits=%d, h**o misses=%d hetero misses=%d' % (notes[2]['homohit'], notes[2]['heterohit'], notes[2]['sexhits'], notes[2]['homomiss'], notes[2]['heteromiss']) heterocommon=notes[2]['hetcom'] heterorare=notes[2]['hetrare'] print 'hetero common=%d, hetero rare=%d, p=%.4f' % (heterocommon, heterorare, binom.cdf(min(heterocommon, heterorare), heterocommon+heterorare, 0.5)) print 'Skipping hetero: p(Yaniv)=%.2f%%' % (100*(p[0])) print 'Using only hetero: p(Yaniv)=%.2f%%' % (100*(p[1])) print 'Using everything: p(Yaniv)=%.2f%%' % (100*(p[2]))
for line in file('/windows/ALL.chr6.ours.vcf'): words = line.rstrip().split('\t') snp = words[2] if snp not in ourvals: continue meaning = words[3:5] if len(meaning[1]) > 1: meaning = [meaning[0]] + meaning[1].split(',') for i in range(ngenomes): ref = words[i + 9] try: ref = meaning[int(ref[0])] + meaning[int(ref[2])] except IndexError as e: print ref print meaning throw(e) ps[i] = update(ourvals[snp], ref, ps[i], ourps[snp], notes[i]) results = zip(ps, range(ngenomes)) results.sort(key=lambda (x): x[0], reverse=True) for i in results: p = i[1] hetcom = notes[p]['hetcom'] hetrare = notes[p]['hetrare'] print 'Patient %d has p=%f homhit=%d hommiss=%d het common=%d rare=%d p=%f' % ( p, i[0], notes[p]['homohit'], notes[p]['homomiss'], hetcom, hetrare, binom.cdf(min(hetcom, hetrare), hetcom + hetrare, 0.5)) if i[0] < .1: break
ourvals[words[2]] = words[3] ourps[words[2]] = float(words[4]) for line in file('/windows/ALL.chr6.ours.vcf'): words=line.rstrip().split('\t') snp = words[2] if snp not in ourvals: continue meaning=words[3:5] if len(meaning[1])>1: meaning=[meaning[0]]+meaning[1].split(',') for i in range(ngenomes): ref=words[i+9] try: ref=meaning[int(ref[0])]+meaning[int(ref[2])] except IndexError as e: print ref print meaning throw(e) ps[i]=update(ourvals[snp], ref, ps[i], ourps[snp], notes[i]) results = zip(ps, range(ngenomes)) results.sort(key=lambda(x):x[0],reverse=True) for i in results: p=i[1] hetcom=notes[p]['hetcom'] hetrare=notes[p]['hetrare'] print 'Patient %d has p=%f homhit=%d hommiss=%d het common=%d rare=%d p=%f' % (p,i[0],notes[p]['homohit'],notes[p]['homomiss'], hetcom, hetrare, binom.cdf(min(hetcom, hetrare), hetcom+hetrare, 0.5)) if i[0]<.1: break