def main(regions, bams, reads=None, flags="-F%i" % (0x100 | 0x4 | 0x200 | 0x400), pad=100): r2 = open(tempfile.mktemp(), 'w') for toks in reader(regions, header=False): if toks[0][0] == "@" or not (toks[1] + toks[2]).isdigit(): continue toks[1] = str(max(0, int(toks[1]) - pad)) toks[2] = str(int(toks[2]) + pad) print >>r2, "\t".join(toks) r2.flush() regions = r2.name if reads.isdigit(): reads = int(reads) elif reads != "bam": reads = int(nopen("|bioawk -c fastx 'END { print NR }' %s" % reads).next()) * 2.0 counts = {} colors = cycle('rgbkmy') bam_reads = {} counts = dict(pmap(count_both, ((bam, regions, flags) for bam in bams))) if reads == "bam": bam_reads = dict(pmap(count_bam, ((bam, flags) for bam in bams))) out = sys.stdout out.write("qual\tmethod\toff\ton\n") for bam in bams: nreads = float(bam_reads[bam] if reads == "bam" else reads) symbol = 'o' if len(set(counts[bam][0])) < 3 else '.' pl.plot(counts[bam][0] / float(nreads), counts[bam][1] / float(nreads), '%s%s' % (colors.next(), symbol), label=name(bam)) for qual in range(0, 256): off, on = counts[bam][0][qual], counts[bam][1][qual] if off + on == 0: continue out.write("{qual}\t{bam}\t{off}\t{on}\n".format( qual=qual, bam=name(bam), off=off / nreads, on=on / nreads)) pl.xlabel('off target') pl.ylabel('on target') pl.legend(loc='lower right') pl.xlim(xmin=0) pl.ylim(ymin=0) pl.savefig('roc.png') os.unlink(r2.name) print >>sys.stderr, "wrote", out.name
def model_clusters(clust_iter, clin_df, formula, coef, model_fn=gee_cluster, pool=None, transform=None, n_cpu=None, **kwargs): """For each cluster in an iterable, evaluate the chosen model and yield a dictionary of information Parameters ---------- clust_iter : iterable iterable of clusters clin_df : pandas.DataFrame Contains covariates from `formula` formula : str R (patsy) style formula. Must contain 'methylation': e.g.: methylation ~ age + gender + race coef : str The coefficient of interest in the model, e.g. 'age' model_fn : fn A function with signature fn(formula, methylation, covs, coef, kwargs) that returns a dictionary with at least p-value and coef transform: fn A function that modifies the data before modeling. n_cpu : int kwargs: dict arguments sent to `model_fn` """ try: clin_df.pop('methylation') except KeyError: pass if transform: tf = lambda cluster: cluster_transform(cluster, transform) for r in ts.pmap( wrapper, ((model_fn, formula, tf(cluster) if transform else cluster, clin_df, coef, kwargs) for cluster in clust_iter), n_cpu, p=pool): yield r
def model_clusters(clust_iter, clin_df, model_str, coef, model_fn=gee_cluster, n_cpu=None, **kwargs): for r in ts.pmap(wrapper, ((model_fn, model_str, cluster, clin_df, coef, kwargs) for cluster in clust_iter), n_cpu): yield r
def model_clusters(clust_iter, clin_df, formula, coef, model_fn=gee_cluster, pool=None, transform=None, n_cpu=None, **kwargs): """For each cluster in an iterable, evaluate the chosen model and yield a dictionary of information Parameters ---------- clust_iter : iterable iterable of clusters clin_df : pandas.DataFrame Contains covariates from `formula` formula : str R (patsy) style formula. Must contain 'methylation': e.g.: methylation ~ age + gender + race coef : str The coefficient of interest in the model, e.g. 'age' model_fn : fn A function with signature fn(formula, methylation, covs, coef, kwargs) that returns a dictionary with at least p-value and coef transform: fn A function that modifies the data before modeling. n_cpu : int kwargs: dict arguments sent to `model_fn` """ try: clin_df.pop('methylation') except KeyError: pass if transform: tf = lambda cluster: cluster_transform(cluster, transform) for r in ts.pmap(wrapper, ((model_fn, formula, tf(cluster) if transform else cluster, clin_df, coef, kwargs) for cluster in clust_iter), n_cpu, p=pool): yield r