def get_expression(**kwargs): atype = "wormtile" simple = wp.get_simple_thr(**mem.rc(kwargs, atype=atype, dthr=3000, dsign=-1, sthr=1e-5)) idfun = lambda g: g.qualifiers["db_xref"][1][9:] genes = dict([(k, set([g for g in v["gnames"]])) for k, v in simple.iteritems()]) gene_union = set([]) for glist in genes.values(): gene_union.update(glist) crofs = wp.chromosome_offsets() gene_info = wp.gene_info(**mem.rc(kwargs)) genome_coords = dict([(k, gene_info[k]["genomestart"]) for i, k in enumerate(gene_union)]) gene_idxs = dict([(k, i) for i, k in enumerate(gene_union)]) gnames = list(gene_union) gene_srtlist = argsort([e[1] for e in sorted(gene_idxs.iteritems(), key=lambda x: genome_coords[x[0]])]) gene_srtidxs = dict([(k, gene_srtlist[e]) for k, e in gene_idxs.iteritems()]) na = len(simple) ng = len(gene_union) gc = zeros((na, ng)) assay_coords = dict([(k, i) for i, k in enumerate(simple.keys())]) gene_counts = dict( [(k, dict([(k2, len(list(g2))) for k2, g2 in it.groupby(sorted(v["gnames"]))])) for k, v in simple.iteritems()] ) for k, v in gene_counts.iteritems(): for k2, v2 in v.iteritems(): gc[assay_coords[k], gene_srtidxs[k2]] = v2 return 1 * greater(gc, 0)
def set_easy0(**kwargs): atype = kwargs.get("atype") simple = wp.get_simple_thr(atype=atype, dthr=1500, dsign=-1, sthr=1e-4) raise Exception() score_soft_cut = -136 score_hard_cut = -90 sids = wu.symbol_ids() prop_tuples = [] for tf, props in simple.iteritems(): # for now, remove tfs that are not mappable if not tf in sids.keys(): continue ssrt = argsort(props["scores"]) lscores = log10(props["scores"][ssrt]) easy = nonzero(less(lscores, score_soft_cut))[0] medium = nonzero(greater(lscores, score_soft_cut) * less(lscores, score_hard_cut))[0] # generous_edges = concatenate([easy,medium]) prop_tuples.append( [ (tf, props["genes"][g], -(score_hard_cut - lscores[g]) / (score_soft_cut - score_hard_cut)) for g in medium ] ) prop_tuples.append([(tf, props["genes"][g], 1) for g in easy]) edgelist = array(list(it.chain(*prop_tuples))) edges = [(sids[e[0]], e[1].qualifiers["db_xref"][1][9:], e[2]) for e in edgelist] return edges
def peak_thr_histograms(**kwargs): '''histograms of score and distance''' dthr = 1500 sthr = 1e-2 dsign = -1 simple = wp.get_simple_thr(**mem.rc(kwargs, dthr = dthr, sthr = sthr, dsign = dsign ) ) min_score = -1 max_score = -1 for k,v in simple.iteritems(): smax = np.max(v['scores']) if max_score == -1 or smax > max_score: max_score = smax smin = np.min(v['scores']) if min_score == -1 or smin < min_score: min_score = smin lrange = [int(floor(log10(min_score))), int(ceil(log10(max_score)))] sbin_mids = range(lrange[0],lrange[1]+1) nsb = len(sbin_mids) sbins = zeros((nsb)) dbin_size = 50 dbin_mids = range(-dthr, dthr, dbin_size) ndb = len(dbin_mids) dbins = zeros(( ndb)) for k,v in simple.iteritems(): for d in v['dists']: dbins[int(d + dthr)/dbin_size] += 1 for s in v['scores']: sbins[int(log10(s) - lrange[0])] += 1 f= myplots.fignum(1,(10,6)) ax = f.add_subplot(121) ax.set_ylabel('log 10 counts') ax.set_xlabel('distance') ax.set_title('simplified tss distances (d<{0})'.format(dthr)) ax.plot(dbin_mids,log10(dbins), color = 'black') f.savefig(myplots.figpath('chip_simple_distance.pdf')) ax = f.add_subplot(122) ax.set_title('simplified tss scores (s<{0:2.2})'.format(sthr)) ax.set_ylabel('log10 counts') ax.set_xlabel('log10 peak score') ax.plot(sbin_mids,log10(sbins), color = 'black') f.savefig(myplots.figpath('chip_simple_scores.pdf'))