parser.add_option("-i", "--inp", dest="inp", help="*.sync or cmh output file") parser.add_option("-s", "--snps", dest="snps", help="cmh output with all SNPs") parser.add_option("-b", "--bins", dest="bins", help="number of bins") parser.add_option("-m", "--maxdist", dest="maxdist", help="maximum distance to candidate.") parser.add_option("-o", "--out", dest="out", help="outputfile for boxplot") parser.add_option("--measure", dest="measure", help="What should be calculated, median (median), geometric mean (gm)") parser.add_option_group(group) (options, args) = parser.parse_args() # 1: Lade die candidaten SNPs chrh,candl = LDIO.read_candidatehash(options.inp, int(options.maxdist) ) # 2: Iteriere uber alle SNPs for l in open(str(options.snps),"r"): snp=PopIO.parse_cmhline(l) candidates=chrh[snp.chr][snp.pos] for cand in candidates: cand.appendSNP(snp) # 3: itererier ueber alle canidaten ofh=open(options.out,"w") for cand in candl: bins=cand.distributeToBins(int(options.bins)) toprint=[] toprint.append(cand.chr) toprint.append(cand.pos) toprint.append(cand.pvalue) for t in bins: value=0 pvalar=[snp.pvalue for snp in t]
parser.add_option("-i", "--inp", dest="inp", help="input = cmh file") parser.add_option("-p", "--pops", dest="pops", help="define replicates and populations: separate populations with a \",\", e.g.8,9,10 ") parser.add_option("-t", "--th", dest="th", help="minimum allele count ") parser.add_option("--test",action="store_true", dest="test",help="run the doctest") parser.add_option_group(group) (options, args) = parser.parse_args() def is_fixed(a,pops,th): # filter empty lines """ """ for i in pops: activepop=a[int(i)-1] alcount=activepop.count_alleles(th) if alcount>1: return 0 return 1 pops=options.pops.split(",") for l in open(str(options.inp),"r"): if l.rstrip()=="": continue p=PopIO.parse_cmhline(l) isfixed=is_fixed(p.populations,pops,int(options.th)) if isfixed>0: print l.rstrip()
#2L 4910 A 6:0:0:0:0:0 - 5:0:0:0:0:0 25:0:0:0:0:0 33:0:0:0:0:0 #2L 4911 G 0:0:0:7:0:0 - 0:0:0:5:0:0 0:0:0:27:0:0 0:0:0:33:0:0 #2L 4912 A 7:0:0:0:0:0 - 5:0:0:0:0:0 27:0:0:0:0:0 32:0:0:0:0:0 #2L 4913 G 0:0:0:7:0:0 - 0:0:0:5:0:0 0:0:0:27:0:0 0:0:0:34:0:0 #2L 4914 A 7:0:0:0:0:0 - 5:0:0:0:0:0 26:0:0:0:0:0 33:0:0:0:0:0 #2L 4915 G 0:0:0:8:0:0 - 0:0:0:5:0:0 0:0:0:22:0:0 0:0:0:33:0:0 #2L 4916 A 8:0:0:0:0:0 - 6:0:0:0:0:0 25:0:0:0:0:0 33:0:0:0:0:0 #2L 4917 G 0:0:0:8:0:0 - 0:0:0:6:0:0 0:0:0:23:0:0 0:0:0:32:0:0 #2L 4918 C 0:0:8:0:0:0 - 0:0:6:0:0:0 0:0:22:0:0:0 0:0:30:0:0:0 print "#chr\tpos\trc\tallele_states\t+""" for l in sync: if l.rstrip("\n")=="": continue p=PopIO.parse_syncline(l) b=str(p.chr)+"_"+str(p.pos) bh={"A":0,"C":0,"G":0,"T":0} for pop in p.populations: bh["A"]+=pop.A bh["T"]+=pop.T bh["C"]+=pop.C bh["G"]+=pop.G maxnonrefc=maxkeyfornotrefc(bh,p.refc) print p.chr+"\t"+str(p.pos)+"\t"+p.refc+"\t"+maxnonrefc+"\t+"