def view_in(): na = nu.net_affinity() f = plt.figure(0) f.clear() ax = f.add_subplot(111) in_degree = sum(na, 0) srt = argsort(in_degree) sm.seismic([in_degree[srt]], ax=ax) myplots.maketitle(ax, 'In degree, sorted')
def tree_similarity(dist1, dist2, run_id,criterion = 'knn', k = 6): if criterion == 'knn': nq = len(dist1) nb1 = argsort(dist1, 1)[:,1:k+1] nb2 = argsort(dist2, 1)[:,1:k+1] all_nbs = [set(n1).union(set(n2)) for n1, n2 in zip(nb1, nb2)] nb_intersection = [set(n1).intersection(set(n2)) for n1, n2 in zip(nb1, nb2)] nb_dists = [ array([[dist1[i, n], dist2[i,n]]for n in nbs ]) for i,nbs in enumerate(all_nbs)] #take the first k distances. n_disagreements = [len(nbd) - k for nbd in nb_dists] nb_dists = array([ sorted(nbd, key = lambda x: min(x))[:k] for nbd in nb_dists]) frac_diffs = [abs(diff(elt, 1).flatten()) / mean(elt,1) for elt in nb_dists] abs_diffs = [abs(diff(elt, 1).flatten()) for elt in nb_dists] ct = mycolors.getct(nq) f = myplots.fignum(4, (10,8)) ax = f.add_axes([.05,.08,.25,.87]) seismic.seismic(abs_diffs, ax = ax, colors = ct) jaccard = mean([float(len(nb_intersection[i])) / float(len(all_nbs[i])) for i in range(nq)]) ax2 = f.add_axes([.34,.08,.6,.87]) for i,d in enumerate(nb_dists): ax2.scatter(d[:,0], d[:,1], 20, alpha = .5,color =ct[i]) lin = linregress(nb_dists[:,:,0].flatten(),nb_dists[:,:,1].flatten()) rsquared = lin[2]**2 ax2.annotate('NN dists for multi/struct-aligned trees.\nK = {0}'.format(k), [0,1], xycoords = 'axes fraction', va = 'top', xytext = [10,-10],textcoords = 'offset pixels') ax2.annotate('R-Squared: {0:3.3}\nJaccard Index: {1:3.3}'.format(rsquared, mean(jaccard)), [1,0], xycoords = 'axes fraction', ha = 'right', xytext = [-10,10],textcoords = 'offset pixels') ax2.set_xlabel('Muscle aligned tree distances') ax2.set_ylabel('Struct algined tree distances') datafile = cfg.dataPath('figs/gpm2/pt2_mus_cm_tree_dists_{0}_k{1}.tiff'.format(run_id, k)) f.savefig(datafile)
def cluster_exprs(all_members, ct_data, do_plot = False, cluster_type = '4d', cluster_id = 4): mrnas = nio.getBDTNP() misc = nio.getBDTNP(misc = True) c = all_members[cluster_id] c_unq = set(list(c)) tissues = dict([('t_{0}'.format(i) , dict(cts = ct_data[equal(c,elt)])) for i, elt in enumerate(c_unq)]) nt = 6 counts = array([[sum(equal(v['cts'][:,1],t)) for t in range(nt) ] for v in tissues.values() ]) if do_plot: f = plt.figure(1) f.clear() ax1 = f.add_subplot('121') ax2 = f.add_subplot('122') seismic.seismic(counts , ax = ax1,stacked = True,colors = mycolors.getct(len(counts))) #seismic.seismic(np.sort(counts,0) , ax = ax2,stacked = False,colors = mycolors.getct(len(counts))) ax2.hist(np.sum(counts,1)) all_exprs = {} for t, v in tissues.iteritems(): ct_all = v['cts'] for time in set([c[1] for c in ct_all]): ct = [ct for ct in ct_all if ct[1] == time] exprs =dict( [(k,elt['vals'][zip(*ct)]) for k, elt in mrnas.iteritems()]) ys = misc['y']['vals'][zip(*ct)] #zip(*sim_xy)] zs = misc['z']['vals'][zip(*ct)] #zip(*sim_xy)] xs = misc['x']['vals'][zip(*ct)] #zip(*sim_xy)] f = plt.figure(1) f.clear() ax1 = f.add_subplot('121', title = 'X-Z axis view for tissue {0}'.\ format(t)) ax2 = f.add_subplot('122',title = 'Y-Z axis view for tissue {0}'.\ format(t)) ax1.scatter(xs, zs) ax2.scatter(ys, zs) v['exprs'] = exprs all_exprs['tiss_{0}_time_{1}'.format(t,time)]=exprs sio.savemat(open(cfg.dataPath('soheil/expression_c{0}_n{1}_tissue{2}_time{3}.mat'.\ format(cluster_type,cluster_id,t,time)),'w'), exprs) f.savefig(open(cfg.dataPath('soheil/expression_c{0}_n{1}_tissue{2}_time{3}.tiff'.\ format(cluster_type,cluster_id,t,time)),'w')) exprs_out = dict([( k, [ mean(sub[k]) for sub in all_exprs[k].values() ]) for k in all_exprs.keys() ]) sio.savemat(open(cfg.dataPath('soheil/expression_c{0}_n{1}_intercluster.mat'.\ format(cluster_type,cluster_id)),'w'), exprs_out) raise Exception()
def sig_grid(num = 1 , method = 'tree', reset = False, plot_kcs = True, bp_means = False, bp_zeros = True, zero_ofs = 1e-6, bp_logs = True, show_kos = False, filter_rows_and_cols = False): #Make and annotate the heatmap figure f = plt.figure(1, facecolor = 'w') f.clear() axdims= .9 ax_box = array([.05,.05,axdims,axdims]) sg_big_hm_annotations(f, ax_box) #Set up the sizes of each group axis in the heatmap figure kwts = float(sum([len(v) for v in exps.values()])) mwidth = .015 msize = mwidth*kwts kw_total = kwts + ( msize * (len(exps)-1)) ofs = 0 allow_tf_kn = False if not allow_tf_kn: grid[zip(*knockout_cells)] = 0 #Some more heatmap cfguration. saturation = [np.percentile(grid[nonzero(greater(grid,0))],10), np.percentile(grid[nonzero(greater(grid,0))],90)] tf_srt = argsort(np.mean(grid,1)) all_bps = [] expsums = [np.mean( grid.T[v,:], 1) for v in exps.values()] max_sum = np.max((list(it.chain(*expsums)))) #For each experiment class, plot a heatmap and overlay per exp sums for k , v in exps.iteritems(): #Axes positioning wid = len(v) ax_ofs = array([ofs/kw_total, 0, (wid) / kw_total,1.]) ax_box = array([.05,.05,0.,0.]) ax_ofs = (ax_ofs * axdims) + ax_box #Make heatmap axes. ax = f.add_axes(ax_ofs, frameon = False) sums = np.mean(grid.T[v,:],1) exp_srt = argsort(sums)[::-1] hm.heatMap( grid.T[v[exp_srt],:][:,tf_srt], axes = ax, vmin = saturation[0], vmax = saturation[1]) #Make overlay axes. ax2 = f.add_axes(array(ax_ofs) + array([0,0,0,0]), frameon = True, axisbg = 'none', xticks = [], yticks = []) #Make the axes look the way I like em for a in ax2.spines.values(): a.set_linewidth(2) a.set_alpha(.5) these_knockouts = nonzero([c [1]in v for c in knockout_cells]) kc = knockout_cells[these_knockouts] kv = knockout_vals[these_knockouts] #If plot kcs is selected, plot the cells corresponding to TF deletion/OE if plot_kcs: if len(kc) > 0: ax.scatter(*zip(*[( list(v).index(x[1]),x[0]) for x in kc]), s =50, color = 'none', edgecolor = 'black', linewidth = 3) color = 'blue' ax2.plot(sums[exp_srt], linewidth = 4, color = color) if bp_means: bpelts = sums else: bpelts = grid.T[v,:].flatten() if not( bp_zeros ): bpelts = bpelts[nonzero(bpelts)] all_bps.append(bpelts) ax2.set_xlim([0,wid]) ax2.set_ylim([0,max_sum]) ax.set_xlim([0,wid]) ax.set_ylim([0,shape(grid)[0]]) ax2.set_xticks([]) #Annotate each axios tbb = matplotlib.transforms.Bbox(ax2.bbox).translated(0,-20) t = ax2.text(-2,0, k, va = 'bottom', ha = 'right', rotation = 90, color = 'black', size = 'x-large', family = 'serif') ofs += wid + msize #Make the boxplot figure f2 = plt.figure(3) plt.clf() if bp_means: bp_kos = array([ mean(grid.T[g[0],:],0) for g in it.groupby(sorted(\ [ko[1] for ko in knockout_cells])) ]) else: bp_kos = array(knockout_vals) if not bp_zeros: bp_kos = bp_kos[nonzero(bp_kos)] all_bps = all_bps + [bp_kos] ax3 = f2.add_subplot('111') if bp_logs: all_bps = [log(b + zero_ofs) for b in all_bps] bp_lzero = log(zero_ofs) boxplots = ax3.boxplot([bp for bp in all_bps], widths= .5) for p in boxplots.values(): for e in p: e.set_linewidth(4) #Annotate the boxplot figure ann_str = '' for i in range(8): ann_str += '{0}: {1}\n'.format(i+1, (exps.keys() + ['TF Knockout/OE'])[i]) ax3.annotate(ann_str, [0,1],xycoords = 'axes fraction', xytext = [10,-10], textcoords = 'offset pixels', va = 'top', ha = 'left') ax3.set_title('''Boxplot of significances per experiment type for {3} learning method, Net {4} Filtered out were {0} cells corresponding to {1} TFs Knocked out or OverExpressed. {2} of these cells have nonzero importance and are plotted at x=9, Showing Means: {5}, Showing zeros: {6}, Plotting logs {7}'''.\ format(len(knockout_cells), len(knockout_tfs), len(nonzero(knockout_vals)[0]), method, num, bp_means, bp_zeros, bp_logs)) ax3.set_ylabel('significance') ax3.set_xlabel('experiment class') f.savefig(cfg.dataPath('daniel/figs/{0}_net{1}_heatmaps.tiff'.format(method, num)), format = 'tiff') plam = lambda: filter_rows_and_cols and 'nonzero_exps_and_tfs_cells_log/'\ or bp_zeros and not bp_logs and bp_means and 'zeros_means_nolog/'\ or not bp_zeros and bp_means and not bp_logs and 'nozeros_means_nolog/'\ or not bp_zeros and bp_means and bp_logs and 'nozeros_means_log/'\ or bp_zeros and not bp_means and not bp_logs and 'zeros_cells_nolog/'\ or not bp_zeros and not bp_means and not bp_logs and 'nozeros_cells_nolog/'\ or not bp_zeros and not bp_means and bp_logs and 'nozeros_cells_log/' dataDir = cfg.dataPath('daniel/figs/{2}{0}_net{1}_boxplots.tiff'.\ format(method, num,plam())) print 'saving {0}'.format(dataDir) if not os.path.isdir(os.path.dirname(dataDir)): os.mkdir(os.path.dirname(dataDir)) if os.path.isfile(dataDir): os.remove(dataDir) f2.savefig(dataDir, format = 'tiff') mean_xvals = [ mean(all_bps[i][nonzero(greater(all_bps[i],bp_lzero))]) for i in range(len(all_bps))] pdfs, xvals = zip(*[histogram(x, bins=50, range=[-15,8], normed=False) for x in all_bps]) import compbio.utils.colors as colors c = colors.getct(len(pdfs)) f3 = plt.figure(3) f3.clear() sax = f3.add_subplot('111') seismic.seismic([array(x,float)/ sum(x) for x in pdfs], xax = xvals[0][:-1],stacked = False, colors = c, xmarkpts = mean_xvals, ax = sax) f4 = plt.figure(4) f4.clear() ax = f4.add_subplot('121') ax.set_title('(log base 10) of Percentage Nonzero for Experiment Classes') percs = log10(array([100*float(len(nonzero(greater(x,bp_lzero))[0])) / len(x) for x in all_bps])) ax.plot(percs,linewidth = 6) ax.set_yticks(percs) names = exps.keys() + ['TF Knockout/OE'] ax.set_yticklabels(['{1}\n{0}'.format('%2.2f' % (10**p), names[idx]) for idx,p in enumerate(percs)]) ax2 = f4.add_subplot('122') ax2.set_title('Mean of Nonzero Experiments for Experiment Classes') means = array([mean(bp[nonzero(greater(bp,bp_lzero))]) for bp in all_bps]) ax2.plot(arange(1,9), means,linewidth = 6) ax2.boxplot( [bp[nonzero(greater(bp,bp_lzero))] for bp in all_bps], widths = .5) ax2.set_yticks(means) names = exps.keys() + ['TF Knockout/OE'] ax2.set_yticklabels(['{1}\n{0}'.format('%2.2f' % (p), names[idx]) for idx,p in enumerate(means)])