Exemplo n.º 1
0
def view(lat_range = [9.,15.], lon_range = [140., 150.] ):
  '''View GIS Eleveation data for the ocean floor.

kwargs:
 lat_range  [9,13]
 lon_range  [140,144]
'''
  nc = sio.netcdf_file(config.dataPath('random/ocean_topo.nc'))
  xvals = nc.variables['x'].data
  yvals = nc.variables['y'].data
  idxs = [nonzero(logical_and(greater(xvals,lon_range[0]),
                              less(xvals, lon_range[1])))[0],
          nonzero(logical_and(greater(yvals,lat_range[0]),
                              less(yvals, lat_range[1])))[0]]
  cmap = mycolors.blackbody()                            
  arr = array(nc.variables['z'].data)
  subimg = arr[:,idxs[0]][idxs[1],:]
  plt.imshow(subimg[::-1], cmap = cmap, interpolation = 'nearest')
Exemplo n.º 2
0
def cluster_tissues(nx = 20,ny = 500, timepoint = -1,
                    step = 4,
                    sim = 'neg_dist', 
                    imshow_sims = False,
                    scatter_sims = False,
                    hist_sims = False,
                    do_cluster= True,
                    do_show = True, cstep = -1):
  '''Cluster ny nuclei by the values of the nx mRNAs with highest
  variance. Uses the medioids method with number of clusters set
  by exemplar self simalarity as outlined in 6.874 and implemented
  at http://www.psi.toronto.edu/affinitypropagation 


  imaging:
  imshow_sims
  scatter_sims
  hist_sims
  do_show

  numerics:
  nx:           number of genes to cluster upon
  ny:           number of cells in the clusterin
  timepoint:    which time to use for cluster computation
  step:         how many genes to skip when showing results
  
  
  So far I have implemented a distance based similarity and a 
  '''
  mrnas = nio.getBDTNP()
  misc = nio.getBDTNP(misc = True)
  shp = shape(mrnas.values()[0]['vals'])

  #choose to look only at one timepoint
  stds = [std(m['vals'][:,timepoint]) for m in mrnas.values()]
  vsort = argsort(stds)[::-1]
  xinds = vsort[:nx]

  #Choose the most variable factors and use them as the 
  #underlying variables from which to construct a similarity
  nuclei =array([ mrnas.values()[idx]['vals'][:,timepoint]
                  for idx in xinds]).T

  t = [ mean(nuclei, 0), std(nuclei,0)]
  t[1][equal(t[1],0)] = 0
  sims = similarity(nuclei, transform = t, method = sim)
  cluster_inds = array(floor(linspace(0,len(nuclei)-1, ny)), int)  
  cluster_training = sims[cluster_inds,:][:,cluster_inds]

  f = plt.figure(0)

  #, projection = '3d')

  if scatter_sims:
    ax = f.add_subplot(111)
    scatterx = [cluster_sims[i] for i in range(ny) for j in range(ny)]
    scattery = [cluster_sims[j] for i in range(ny) for j in range(ny)]
    ax.scatter(scatterx, scattery, s =3, alpha = .1)

  if imshow_sims:
    ax = f.add_subplot(111)
    cmap = mycolors.blackbody()
    ax.imshow(cluster_sims, cmap = cmap, interpolation = 'nearest')

  if hist_sims:
    ax = f.add_subplot(111)
    csf = cluster_sims.flatten()
    csf -= max(csf)
    csf *= -1
    h = histogram(log10(1+csf), bins = 100)
    ax.plot(h[1][:-1],h[0])


  cluster(cluster_training, ss.scoreatpercentile(cluster_training,.2) )
  
  fopen = open(cfg.dataPath('bdtnp/clustering/nuclei/idxs'))
  lines = fopen.readlines()
  c = [int(l.strip()) for l in lines]
  c_training_exemplars = set(c)
  exemplar_inds = [cluster_inds[i] for i in c_training_exemplars]
  #I am being a bit lazy with subscripting here because I just assume
  #that the similarity is symmetric... I suppose I could let it be 
  #asymmetric if I liked

  
  exemplars = nuclei[exemplar_inds,:]
  all_sims = similarity(nuclei,  exemplars,
                        transform = t, 
                        transform_exemplars = True,
                        method = sim)
  assignments = np.argmax(all_sims,1)


  ne = len(c_training_exemplars)
  colors = array(mycolors.getct(len(c)))
  colors = array(colors)


  if do_show:
    for tp in range(shape(mrnas.values()[0]['vals'])[1])[-1:]:
      try: f.clear()
      except Exception, e: print 'Weird 3d plotting error. Alas'
      nuclei =array([ mrnas.values()[idx]['vals'][:,tp]
                      for idx in xinds]).T
      all_sims = similarity(nuclei,  exemplars,
                            transform = t, transform_exemplars = True,
                            method = sim)
      assignments = np.argmax(all_sims,1)


      ax = f.add_subplot(111)
      #colors = [colors[i] for i in c]
      xs = misc['x']['vals'][::step,0]
      ys = misc['y']['vals'][::step,0]
      zs = misc['z']['vals'][::step,0]
      ax.scatter(xs, zs,s= 50, color =colors[assignments[::step]])
      #ax.set_title('''Virtual embryo cell (n={2}) clusters 
#from similarities derived from {0} genes. 
#Clusters derived at T = {1}, shown at T = {3}.'''\
#                     .format(nx,timepoint, len(xs),tp))
    
      f.savefig(cfg.dataPath('figs/bdtnp/cluster_movie{0:02d}.tiff'.format(tp)), format = 'tiff')
Exemplo n.º 3
0
def heatMap(grid, 
            ann = [],
            xlabel = 'none', 
            ylabel = 'none',
            **kwargs):

  '''
  for now, this is a primordial heatmap script.
  The optimal use case is as in the function heatmapGene from 
  compbio.projects.predict.py. In this case, we input a grid and
  a list of annotations that happen to annotate each and every
  grid point. In particular, the dictionary contains an xvalue 
  and a yvalue for each point as well as an entry, 'pkeys' that
  give the names of the elements being plotted on the x and the y.

  e.g: ann[0] = {'pvalue':0.,
                 'cvalue':0.,
                 'pkeys':['pvalue','cvalue']
                 }
                 
  Of course, this assumes a standard form for annotations.
  I guess its not so bad for now.

'''

  cmap = mycolors.blackbody()

  ax = kwargs.get('axes', None)
  if not ax:
    f = kwargs.get('fig', 0)
    figure = plt.figure(f)
    ax = figure.add_subplot(111)
    
  ax.imshow(grid.T, cmap = cmap, interpolation = 'nearest',
            origin = 'lower', aspect = 'auto', **kwargs)

  xticks, xticklabels, yticklabels = [], [], []
  yticks = []

  edgept = lambda i,j:  ( i == 0 and j == 0) \
      or ( i == len(ann) -1 and j == len(ann[0])-1)
  lowpt = argmin(grid)
  highpt = argmax(grid)

  ishigh = lambda i,j : (i,j) == unravel_index(highpt, shape(grid))
  islow = lambda i,j: (i,j) == unravel_index(lowpt, shape(grid))


  
  for i in range(shape(ann)[0]):
    for j in range(shape(ann)[1]): 
      
      if random.random() < ( 0./product(shape(ann))) \
            or edgept(i,j) or islow(i,j) or ishigh(i,j): 

        d = ann[i][j]
        dkeys = d['pkeys']
        s =  dictString(ann[i][j])
        xy = [i,j]
        color = 'black'

        xytext=(30,10)
        textcoords='offset pixels'
        bbox = None #bbox=dict(boxstyle="round4", fc="none")
        arrowprops=dict(arrowstyle="-|>",
                        connectionstyle="arc3,rad=-0.2",
                        relpos=(0., 0.),
                        shrinkA = 0,
                        shrinkB = 10,
                        fc="none") 
        ax.scatter(xy[0],xy[1], 100, 
                    color = 'none',
                    edgecolor = 'black')
        if islow(i,j): 
          color ='red'
          s+= '\nLow Point: {0}'.format(strFormat(grid[i,j]))
        elif ishigh(i,j):
          color = 'blue'
          s+= '\nHigh Point: {0}'.format(strFormat(grid[i,j]))


        if xlabel == 'none': xlabel = dkeys[0]
        if ylabel == 'none': ylabel = dkeys[1]

        if not edgept(i,j): ax.annotate(s,xy,xytext = xytext,
                                        textcoords = textcoords,
                                        bbox = bbox,
                                        arrowprops =arrowprops)
        
        xticks.append(i)
        xticklabels.append(strFormat(d[dkeys[0]]))
        yticks.append(j)
        yticklabels.append(strFormat(d[dkeys[1]]))
        
  ax.set_xticks(xticks)
  ax.set_xticklabels(xticklabels)
  ax.set_yticks(yticks)
  ax.set_yticklabels(yticklabels)
  if xlabel != 'none': ax.set_xlabel(xlabel)
  if ylabel != 'none': ax.set_ylabel(ylabel)

  return ax