def run(args): col_num = get_col_num(args.c) file_iter = (l.rstrip("\r\n").split("\t") for l in open(args.file) if l[0] != "#") pvals = np.array([float(b[col_num]) for b in file_iter]) kwargs = {"bins": args.n} if args.n else {} hist, bins = np.histogram(pvals, normed=True, **kwargs) xlabels = "|".join("%.2f-%.2f" % b for b in pairwise(bins)) print "#", chart(hist, xlabels) hist, bins = np.histogram(pvals, normed=False, **kwargs) print "# median: %.3f mean:%.3f; std: %.3f min:%.3f; max:%.3f" % ( np.median(pvals), pvals.mean(), pvals.std(), pvals.min(), pvals.max()) try: from scipy.stats import chisquare chisq, p = chisquare(hist) print "#chi-square test of uniformity. p: %.3g " \ "(low value means reject null of uniformity)" % p except ImportError: pass print "#bin_start\tbin_end\tn" for bin, val in zip(pairwise(bins), hist): print "%.2f\t%.2f\t%i" % (bin[0], bin[1], val)
def create_acf_list(lags): acfs = [] for lag_min, lag_max in pairwise(lags): acfs.append((lag_min, lag_max, # array uses less memory than list. {"x": array("f"), "y": array("f")})) acfs.reverse() return acfs
def run(args): col_num = get_col_num(args.c) file_iter = (l.rstrip("\r\n").split("\t") for l in ts.nopen(args.file) if l[0] != "#") pvals = np.array([float(b[col_num]) for b in file_iter]) kwargs = {"bins": args.n} if args.n else {} hist, bins = np.histogram(pvals, normed=True, **kwargs) xlabels = "|".join("%.2f-%.2f" % b for b in pairwise(bins)) hist, bins = np.histogram(pvals, normed=False, **kwargs) print("# median: %.3f mean:%.3f; std: %.3f min:%.3f; max:%.3f" % ( np.median(pvals), pvals.mean(), pvals.std(), pvals.min(), pvals.max())) try: from scipy.stats import chisquare chisq, p = chisquare(hist) print("#chi-square test of uniformity. p: %.3g " \ "(low value means reject null of uniformity)" % p) except ImportError: pass print("#bin_start\tbin_end\tn") for bin, val in zip(pairwise(bins), hist): print("%.2f\t%.2f\t%i" % (bin[0], bin[1], val))