def __init__(self,filename): self.array=self.toArray(filename) self.numericArray,self.numericColumnIndicesMap,self.numericColumnIndexMap=self.toNumericArray() self.clusters=[] self.enum=self.calcEnumStrs() #prj7: #confidence interval for each species self.cidata=stats.ci(self)
def summarizer(sp_tree, sp_quartet_tree, outfile): if outfile: v_out = open(outfile + ".verbose.tsv", "w") outf = open(outfile + ".tre", "w") #Data stored on species tree for i in sp_tree.iternodes(): if i == sp_tree: continue #No children means a tip if len(i.children) == 0: holder = i.data["qln"] if outfile: #convert all floats to strings for printing temp = list(map(str, holder)) v_out.write(i.label + "\t" + "\t".join(temp) + "\n") else: holder = sp_quartet_tree[i.data["q"]] if outfile: temp = list(map(str, holder)) v_out.write( i.get_newick_repr(False) + "\t" + "\t".join(temp) + "\n") #Make sure something has actually been concordant and met the cutoff if len(holder) == 0: mean = 0.0 median = 0.0 min = 0.0 max = 0.0 CIL = 0.0 CIH = 0.0 i.data["concord"] = len(holder) else: mean = stats.mean(holder) median = stats.median(holder) min = stats.min(holder) max = stats.max(holder) i.data["concord"] = len(holder) #account for the fact you need two for CI's if len(holder) > 1: #array and z-value (95% is 1.96) CIL, CIH = stats.ci(holder, 1.96) else: CIL = 0.0 CIH = 0.0 i.data["mean"] = mean i.data["median"] = median i.data["min"] = min i.data["max"] = max i.data["cih"] = CIH i.data["cil"] = CIL array = ["mean", "median", "min", "max", "cil", "cih", "concord"] for i in array: if outfile: outf.write(sp_tree.get_newick_otherlen(i) + ";\n") else: print sp_tree.get_newick_otherlen(i) + ";"
'1-sigma': erf(1/sqrt(2)), '95%': 0.95, } z = [int(ki) for ki in logspace(1,3,low_points)] if mid_points: z += [int(ki) for ki in logspace(3,5,mid_points)] if high_points: z += [int(ki) for ki in logspace(5,7,high_points)] z = list(sorted(set(z))) # Make z unique ni,nj = len(intervals),len(dists) for pi,interval_name in enumerate(sorted(intervals.keys())): for pj,dist_name in enumerate(sorted(dists.keys())): print "processing",dist_name,interval_name s,rng = intervals[interval_name], dists[dist_name] #print [min(1000,work//ki+1) for ki in z] unbiased = [array([ci(rng(ki),s,unbiased=True) for _ in range(min(maxn,work//ki+1))]) for ki in z] biased = [array([ci(rng(ki),s,unbiased=False) for _ in range(min(maxn,work//ki+1))]) for ki in z] pylab.subplot(ni,nj,pi*nj+pj+1) #print data for data in biased, unbiased: delta = [diff(di,axis=1)[:,0] for di in data] w = [mean(di) for di in delta] dw = [std(di) for di in delta] #print z,w,dw #print interval_name, dist_name, ni, nj, pi, pj, pj*ni+pi pylab.semilogx(z,w,'o') pylab.errorbar(z,w,dw)