Пример #1
0
def get_expression(**kwargs):
    atype = "wormtile"
    simple = wp.get_simple_thr(**mem.rc(kwargs, atype=atype, dthr=3000, dsign=-1, sthr=1e-5))

    idfun = lambda g: g.qualifiers["db_xref"][1][9:]
    genes = dict([(k, set([g for g in v["gnames"]])) for k, v in simple.iteritems()])
    gene_union = set([])
    for glist in genes.values():
        gene_union.update(glist)

    crofs = wp.chromosome_offsets()
    gene_info = wp.gene_info(**mem.rc(kwargs))

    genome_coords = dict([(k, gene_info[k]["genomestart"]) for i, k in enumerate(gene_union)])
    gene_idxs = dict([(k, i) for i, k in enumerate(gene_union)])
    gnames = list(gene_union)
    gene_srtlist = argsort([e[1] for e in sorted(gene_idxs.iteritems(), key=lambda x: genome_coords[x[0]])])
    gene_srtidxs = dict([(k, gene_srtlist[e]) for k, e in gene_idxs.iteritems()])

    na = len(simple)
    ng = len(gene_union)
    gc = zeros((na, ng))

    assay_coords = dict([(k, i) for i, k in enumerate(simple.keys())])
    gene_counts = dict(
        [(k, dict([(k2, len(list(g2))) for k2, g2 in it.groupby(sorted(v["gnames"]))])) for k, v in simple.iteritems()]
    )
    for k, v in gene_counts.iteritems():
        for k2, v2 in v.iteritems():
            gc[assay_coords[k], gene_srtidxs[k2]] = v2
    return 1 * greater(gc, 0)
Пример #2
0
    def set_easy0(**kwargs):
        atype = kwargs.get("atype")
        simple = wp.get_simple_thr(atype=atype, dthr=1500, dsign=-1, sthr=1e-4)
        raise Exception()
        score_soft_cut = -136
        score_hard_cut = -90
        sids = wu.symbol_ids()
        prop_tuples = []
        for tf, props in simple.iteritems():
            # for now, remove tfs that are not mappable
            if not tf in sids.keys():
                continue
            ssrt = argsort(props["scores"])
            lscores = log10(props["scores"][ssrt])

            easy = nonzero(less(lscores, score_soft_cut))[0]
            medium = nonzero(greater(lscores, score_soft_cut) * less(lscores, score_hard_cut))[0]
            # generous_edges = concatenate([easy,medium])
            prop_tuples.append(
                [
                    (tf, props["genes"][g], -(score_hard_cut - lscores[g]) / (score_soft_cut - score_hard_cut))
                    for g in medium
                ]
            )
            prop_tuples.append([(tf, props["genes"][g], 1) for g in easy])

        edgelist = array(list(it.chain(*prop_tuples)))
        edges = [(sids[e[0]], e[1].qualifiers["db_xref"][1][9:], e[2]) for e in edgelist]
        return edges
Пример #3
0
def peak_thr_histograms(**kwargs):
    '''histograms of score and distance'''
    dthr = 1500
    sthr = 1e-2
    dsign = -1
    simple = wp.get_simple_thr(**mem.rc(kwargs,
                                        dthr = dthr,
                                        sthr = sthr,
                                        dsign = dsign
                                        )
                               )

                                 
                                 
    min_score = -1
    max_score = -1
    for k,v in simple.iteritems():
        smax = np.max(v['scores'])
        if max_score == -1 or smax > max_score:
            max_score = smax
        smin = np.min(v['scores'])
        if min_score == -1 or smin < min_score:
            min_score = smin

    lrange = [int(floor(log10(min_score))), int(ceil(log10(max_score)))]
    sbin_mids = range(lrange[0],lrange[1]+1)
    nsb = len(sbin_mids)
    sbins = zeros((nsb))

    dbin_size = 50
    dbin_mids = range(-dthr, dthr, dbin_size)
    ndb = len(dbin_mids)
    dbins = zeros(( ndb))

    for k,v in simple.iteritems():
        for d in v['dists']:
            dbins[int(d + dthr)/dbin_size] += 1
        for s in v['scores']:
            sbins[int(log10(s) - lrange[0])] += 1
    
    f= myplots.fignum(1,(10,6))
    ax = f.add_subplot(121)
    ax.set_ylabel('log 10 counts')
    ax.set_xlabel('distance')
    ax.set_title('simplified tss distances (d<{0})'.format(dthr))
    ax.plot(dbin_mids,log10(dbins), color = 'black')
    f.savefig(myplots.figpath('chip_simple_distance.pdf'))

    ax = f.add_subplot(122)
    ax.set_title('simplified tss scores (s<{0:2.2})'.format(sthr))
    ax.set_ylabel('log10 counts')
    ax.set_xlabel('log10 peak score')
    ax.plot(sbin_mids,log10(sbins), color = 'black')
    f.savefig(myplots.figpath('chip_simple_scores.pdf'))