def view(lat_range = [9.,15.], lon_range = [140., 150.] ): '''View GIS Eleveation data for the ocean floor. kwargs: lat_range [9,13] lon_range [140,144] ''' nc = sio.netcdf_file(config.dataPath('random/ocean_topo.nc')) xvals = nc.variables['x'].data yvals = nc.variables['y'].data idxs = [nonzero(logical_and(greater(xvals,lon_range[0]), less(xvals, lon_range[1])))[0], nonzero(logical_and(greater(yvals,lat_range[0]), less(yvals, lat_range[1])))[0]] cmap = mycolors.blackbody() arr = array(nc.variables['z'].data) subimg = arr[:,idxs[0]][idxs[1],:] plt.imshow(subimg[::-1], cmap = cmap, interpolation = 'nearest')
def cluster_tissues(nx = 20,ny = 500, timepoint = -1, step = 4, sim = 'neg_dist', imshow_sims = False, scatter_sims = False, hist_sims = False, do_cluster= True, do_show = True, cstep = -1): '''Cluster ny nuclei by the values of the nx mRNAs with highest variance. Uses the medioids method with number of clusters set by exemplar self simalarity as outlined in 6.874 and implemented at http://www.psi.toronto.edu/affinitypropagation imaging: imshow_sims scatter_sims hist_sims do_show numerics: nx: number of genes to cluster upon ny: number of cells in the clusterin timepoint: which time to use for cluster computation step: how many genes to skip when showing results So far I have implemented a distance based similarity and a ''' mrnas = nio.getBDTNP() misc = nio.getBDTNP(misc = True) shp = shape(mrnas.values()[0]['vals']) #choose to look only at one timepoint stds = [std(m['vals'][:,timepoint]) for m in mrnas.values()] vsort = argsort(stds)[::-1] xinds = vsort[:nx] #Choose the most variable factors and use them as the #underlying variables from which to construct a similarity nuclei =array([ mrnas.values()[idx]['vals'][:,timepoint] for idx in xinds]).T t = [ mean(nuclei, 0), std(nuclei,0)] t[1][equal(t[1],0)] = 0 sims = similarity(nuclei, transform = t, method = sim) cluster_inds = array(floor(linspace(0,len(nuclei)-1, ny)), int) cluster_training = sims[cluster_inds,:][:,cluster_inds] f = plt.figure(0) #, projection = '3d') if scatter_sims: ax = f.add_subplot(111) scatterx = [cluster_sims[i] for i in range(ny) for j in range(ny)] scattery = [cluster_sims[j] for i in range(ny) for j in range(ny)] ax.scatter(scatterx, scattery, s =3, alpha = .1) if imshow_sims: ax = f.add_subplot(111) cmap = mycolors.blackbody() ax.imshow(cluster_sims, cmap = cmap, interpolation = 'nearest') if hist_sims: ax = f.add_subplot(111) csf = cluster_sims.flatten() csf -= max(csf) csf *= -1 h = histogram(log10(1+csf), bins = 100) ax.plot(h[1][:-1],h[0]) cluster(cluster_training, ss.scoreatpercentile(cluster_training,.2) ) fopen = open(cfg.dataPath('bdtnp/clustering/nuclei/idxs')) lines = fopen.readlines() c = [int(l.strip()) for l in lines] c_training_exemplars = set(c) exemplar_inds = [cluster_inds[i] for i in c_training_exemplars] #I am being a bit lazy with subscripting here because I just assume #that the similarity is symmetric... I suppose I could let it be #asymmetric if I liked exemplars = nuclei[exemplar_inds,:] all_sims = similarity(nuclei, exemplars, transform = t, transform_exemplars = True, method = sim) assignments = np.argmax(all_sims,1) ne = len(c_training_exemplars) colors = array(mycolors.getct(len(c))) colors = array(colors) if do_show: for tp in range(shape(mrnas.values()[0]['vals'])[1])[-1:]: try: f.clear() except Exception, e: print 'Weird 3d plotting error. Alas' nuclei =array([ mrnas.values()[idx]['vals'][:,tp] for idx in xinds]).T all_sims = similarity(nuclei, exemplars, transform = t, transform_exemplars = True, method = sim) assignments = np.argmax(all_sims,1) ax = f.add_subplot(111) #colors = [colors[i] for i in c] xs = misc['x']['vals'][::step,0] ys = misc['y']['vals'][::step,0] zs = misc['z']['vals'][::step,0] ax.scatter(xs, zs,s= 50, color =colors[assignments[::step]]) #ax.set_title('''Virtual embryo cell (n={2}) clusters #from similarities derived from {0} genes. #Clusters derived at T = {1}, shown at T = {3}.'''\ # .format(nx,timepoint, len(xs),tp)) f.savefig(cfg.dataPath('figs/bdtnp/cluster_movie{0:02d}.tiff'.format(tp)), format = 'tiff')
def heatMap(grid, ann = [], xlabel = 'none', ylabel = 'none', **kwargs): ''' for now, this is a primordial heatmap script. The optimal use case is as in the function heatmapGene from compbio.projects.predict.py. In this case, we input a grid and a list of annotations that happen to annotate each and every grid point. In particular, the dictionary contains an xvalue and a yvalue for each point as well as an entry, 'pkeys' that give the names of the elements being plotted on the x and the y. e.g: ann[0] = {'pvalue':0., 'cvalue':0., 'pkeys':['pvalue','cvalue'] } Of course, this assumes a standard form for annotations. I guess its not so bad for now. ''' cmap = mycolors.blackbody() ax = kwargs.get('axes', None) if not ax: f = kwargs.get('fig', 0) figure = plt.figure(f) ax = figure.add_subplot(111) ax.imshow(grid.T, cmap = cmap, interpolation = 'nearest', origin = 'lower', aspect = 'auto', **kwargs) xticks, xticklabels, yticklabels = [], [], [] yticks = [] edgept = lambda i,j: ( i == 0 and j == 0) \ or ( i == len(ann) -1 and j == len(ann[0])-1) lowpt = argmin(grid) highpt = argmax(grid) ishigh = lambda i,j : (i,j) == unravel_index(highpt, shape(grid)) islow = lambda i,j: (i,j) == unravel_index(lowpt, shape(grid)) for i in range(shape(ann)[0]): for j in range(shape(ann)[1]): if random.random() < ( 0./product(shape(ann))) \ or edgept(i,j) or islow(i,j) or ishigh(i,j): d = ann[i][j] dkeys = d['pkeys'] s = dictString(ann[i][j]) xy = [i,j] color = 'black' xytext=(30,10) textcoords='offset pixels' bbox = None #bbox=dict(boxstyle="round4", fc="none") arrowprops=dict(arrowstyle="-|>", connectionstyle="arc3,rad=-0.2", relpos=(0., 0.), shrinkA = 0, shrinkB = 10, fc="none") ax.scatter(xy[0],xy[1], 100, color = 'none', edgecolor = 'black') if islow(i,j): color ='red' s+= '\nLow Point: {0}'.format(strFormat(grid[i,j])) elif ishigh(i,j): color = 'blue' s+= '\nHigh Point: {0}'.format(strFormat(grid[i,j])) if xlabel == 'none': xlabel = dkeys[0] if ylabel == 'none': ylabel = dkeys[1] if not edgept(i,j): ax.annotate(s,xy,xytext = xytext, textcoords = textcoords, bbox = bbox, arrowprops =arrowprops) xticks.append(i) xticklabels.append(strFormat(d[dkeys[0]])) yticks.append(j) yticklabels.append(strFormat(d[dkeys[1]])) ax.set_xticks(xticks) ax.set_xticklabels(xticklabels) ax.set_yticks(yticks) ax.set_yticklabels(yticklabels) if xlabel != 'none': ax.set_xlabel(xlabel) if ylabel != 'none': ax.set_ylabel(ylabel) return ax