示例#1
0
def main(regions, bams, reads=None, flags="-F%i" % (0x100 | 0x4 | 0x200 | 0x400),
        pad=100):
    r2 = open(tempfile.mktemp(), 'w')
    for toks in reader(regions, header=False):
        if toks[0][0] == "@" or not (toks[1] + toks[2]).isdigit(): continue
        toks[1] = str(max(0, int(toks[1]) - pad))
        toks[2] = str(int(toks[2]) + pad)
        print >>r2, "\t".join(toks)
    r2.flush()
    regions = r2.name
    if reads.isdigit():
        reads = int(reads)
    elif reads != "bam":
        reads = int(nopen("|bioawk -c fastx 'END { print NR }' %s" % reads).next()) * 2.0

    counts = {}
    colors = cycle('rgbkmy')
    bam_reads = {}

    counts = dict(pmap(count_both, ((bam, regions, flags)
                            for bam in bams)))

    if reads == "bam":
        bam_reads = dict(pmap(count_bam, ((bam, flags) for bam in bams)))

    out = sys.stdout
    out.write("qual\tmethod\toff\ton\n")

    for bam in bams:
        nreads = float(bam_reads[bam] if reads == "bam" else reads)
        symbol = 'o' if len(set(counts[bam][0])) < 3 else '.'
        pl.plot(counts[bam][0] / float(nreads), counts[bam][1] / float(nreads),
                '%s%s' % (colors.next(), symbol), label=name(bam))

        for qual in range(0, 256):
            off, on = counts[bam][0][qual], counts[bam][1][qual]
            if off + on == 0: continue
            out.write("{qual}\t{bam}\t{off}\t{on}\n".format(
                qual=qual, bam=name(bam),
                off=off / nreads,
                on=on / nreads))

    pl.xlabel('off target')
    pl.ylabel('on target')
    pl.legend(loc='lower right')
    pl.xlim(xmin=0)
    pl.ylim(ymin=0)
    pl.savefig('roc.png')
    os.unlink(r2.name)

    print >>sys.stderr, "wrote", out.name
示例#2
0
def model_clusters(clust_iter,
                   clin_df,
                   formula,
                   coef,
                   model_fn=gee_cluster,
                   pool=None,
                   transform=None,
                   n_cpu=None,
                   **kwargs):
    """For each cluster in an iterable, evaluate the chosen model and
    yield a dictionary of information

    Parameters
    ----------

    clust_iter : iterable
        iterable of clusters

    clin_df : pandas.DataFrame
        Contains covariates from `formula`

    formula : str
        R (patsy) style formula. Must contain 'methylation': e.g.:
        methylation ~ age + gender + race

    coef : str
        The coefficient of interest in the model, e.g. 'age'

    model_fn : fn
        A function with signature
        fn(formula, methylation, covs, coef, kwargs)
        that returns a dictionary with at least p-value and coef

    transform: fn
        A function that modifies the data before modeling.

    n_cpu : int

    kwargs: dict
        arguments sent to `model_fn`
    """
    try:
        clin_df.pop('methylation')
    except KeyError:
        pass
    if transform:
        tf = lambda cluster: cluster_transform(cluster, transform)

    for r in ts.pmap(
            wrapper,
        ((model_fn, formula, tf(cluster) if transform else cluster, clin_df,
          coef, kwargs) for cluster in clust_iter),
            n_cpu,
            p=pool):
        yield r
示例#3
0
def model_clusters(clust_iter,
                   clin_df,
                   model_str,
                   coef,
                   model_fn=gee_cluster,
                   n_cpu=None,
                   **kwargs):
    for r in ts.pmap(wrapper,
                     ((model_fn, model_str, cluster, clin_df, coef, kwargs)
                      for cluster in clust_iter), n_cpu):
        yield r
示例#4
0
def model_clusters(clust_iter, clin_df, formula, coef, model_fn=gee_cluster,
        pool=None,
        transform=None,
        n_cpu=None,
        **kwargs):
    """For each cluster in an iterable, evaluate the chosen model and
    yield a dictionary of information

    Parameters
    ----------

    clust_iter : iterable
        iterable of clusters

    clin_df : pandas.DataFrame
        Contains covariates from `formula`

    formula : str
        R (patsy) style formula. Must contain 'methylation': e.g.:
        methylation ~ age + gender + race

    coef : str
        The coefficient of interest in the model, e.g. 'age'

    model_fn : fn
        A function with signature
        fn(formula, methylation, covs, coef, kwargs)
        that returns a dictionary with at least p-value and coef

    transform: fn
        A function that modifies the data before modeling.

    n_cpu : int

    kwargs: dict
        arguments sent to `model_fn`
    """
    try:
        clin_df.pop('methylation')
    except KeyError:
        pass
    if transform:
        tf = lambda cluster: cluster_transform(cluster, transform)

    for r in ts.pmap(wrapper, ((model_fn, formula,
                                tf(cluster) if transform else cluster,
                                clin_df, coef,
                                kwargs) for cluster in clust_iter), n_cpu,
                                p=pool):
        yield r
示例#5
0
def model_clusters(clust_iter, clin_df, model_str, coef, model_fn=gee_cluster, n_cpu=None, **kwargs):
    for r in ts.pmap(wrapper, ((model_fn, model_str, cluster, clin_df, coef, kwargs) for cluster in clust_iter), n_cpu):
        yield r