def export_weight_ds(agg, data_dir='/auto/tdrive/mschachter/data', decomp='full_psds'): freqs,lags = get_freqs_and_lags() assert isinstance(agg, AcousticEncoderDecoderAggregator) edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data+dist.csv')) data = {'bird': list(), 'block': list(), 'segment': list(), 'hemi': list(), 'electrode': list(), 'reg': list(), 'dm': list(), 'dl': list(), 'aprop': list(), 'r2': list(), 'f':list(), 'w':list()} aprops = USED_ACOUSTIC_PROPS nprops = len(aprops) i = agg.df.decomp == decomp g = agg.df[i].groupby(['bird', 'block', 'segment', 'hemi']) for (bird, block, segment, hemi), gdf in g: assert len(gdf) == 1 wkey = gdf.wkey.values[0] index2electrode = agg.index2electrode[wkey] nelectrodes = len(index2electrode) nfreqs = len(freqs) dweights = agg.decoder_weights[wkey] assert dweights.shape == (nelectrodes, nfreqs, nprops), "dweights.shape=%s" % str(dweights.shape) dperfs = agg.decoder_perfs[wkey] assert len(dperfs) == nprops for k,e in enumerate(index2electrode): ei = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e) assert ei.sum() == 1 reg = clean_region(edata.region[ei].values[0]) dist_l2a = edata.dist_l2a[ei].values[0] dist_midline = edata.dist_midline[ei].values[0] if bird == 'GreBlu9508M': dist_l2a *= 4 for j,f in enumerate(freqs): for m,aprop in enumerate(aprops): data['bird'].append(bird) data['block'].append(block) data['segment'].append(segment) data['hemi'].append(hemi) data['electrode'].append(e) data['reg'].append(reg) data['dm'].append(dist_midline) data['dl'].append(dist_l2a) data['aprop'].append(aprop) data['r2'].append(dperfs[m]) data['f'].append(int(f)) data['w'].append(dweights[k, j, m]) df = pd.DataFrame(data) df.to_csv(os.path.join(data_dir, 'aggregate', 'decoder_weights_for_glm.csv'), header=True, index=False)
def get_encoder_perf_data_for_psd(agg, ein=None): if ein is None: i = (agg.df.encoder_input == 'rate') | (agg.df.encoder_input == 'both') else: i = (agg.df.encoder_input == ein) i &= (agg.df.encoder_output == 'psd') & (agg.df.decomp == 'full') edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data+dist.csv')) pdata = {'bird': list(), 'block': list(), 'segment': list(), 'hemi': list(), 'electrode': list(), 'region': list(), 'f': list(), 'r2': list(), 'dist_l2a': list(), 'dist_midline': list(), 'ein':list()} for wkey in agg.df.wkey[i].values: bird, block, segment, hemi, ein2, eout2, decomp = wkey.split('_') eperfs = agg.encoder_perfs[wkey] index2electrode = agg.index2electrode[wkey] for k, e in enumerate(index2electrode): ei = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e) assert ei.sum() == 1 reg = clean_region(edata.region[ei].values[0]) dist_l2a = edata.dist_l2a[ei].values[0] dist_midline = edata.dist_midline[ei].values[0] if bird == 'GreBlu9508M': dist_l2a *= 4 for j, f in enumerate(agg.freqs): pdata['bird'].append(bird) pdata['block'].append(block) pdata['segment'].append(segment) pdata['hemi'].append(hemi) pdata['electrode'].append(e) pdata['ein'].append(ein2) pdata['region'].append(reg) pdata['dist_l2a'].append(dist_l2a) pdata['dist_midline'].append(dist_midline) pdata['f'].append(int(f)) pdata['r2'].append(eperfs[k, j]) df = pd.DataFrame(pdata) if ein is not None: df.to_csv('/auto/tdrive/mschachter/data/aggregate/lfp_encoder_perfs_%s.csv' % ein, index=False, header=True) else: df.to_csv('/auto/tdrive/mschachter/data/aggregate/lfp_encoder_perfs.csv', index=False, header=True) return df
def export_ds(agg, data_dir='/auto/tdrive/mschachter/data'): data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(), 'electrode':list(), 'aprop':list(), 'region':list(), 'dist_midline':list(), 'dist_l2a':list(), 'r2':list()} edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data+dist.csv')) i = edata.bird != 'BlaBro09xxF' edata = edata[i] g = agg.df.groupby(['bird', 'block', 'segment', 'hemi', 'electrode', 'aprop']) for (bird,block,segment,hemi,electrode,aprop),gdf in g: assert len(gdf) == 1 ei = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == electrode) assert ei.sum() == 1 reg = clean_region(edata.region[ei].values[0]) dist_l2a = edata.dist_l2a[ei].values[0] dist_midline = edata.dist_midline[ei].values[0] data['bird'].append(bird) data['block'].append(block) data['segment'].append(segment) data['hemi'].append(hemi) data['electrode'].append(electrode) data['aprop'].append(aprop) data['region'].append(reg) data['dist_midline'].append(dist_midline) data['dist_l2a'].append(dist_l2a) data['r2'].append(gdf.r2.values[0]) df = pd.DataFrame(data) i = ~np.isnan(df.dist_l2a) & ~np.isnan(df.dist_midline) df.to_csv(os.path.join(data_dir, 'aggregate', 'single_electrode_decoder.csv'), header=True, index=False) return df[i]
def stats(agg, data_dir='/auto/tdrive/mschachter/data'): data = {'bird': list(), 'block': list(), 'segment': list(), 'hemi': list(), 'electrode': list(), 'linear_cc': list(), 'cc': list(), 'err': list(), 'lambda1': list(), 'lambda2': list(), 'n_unit': list(), 'region':list(), 'md5':list()} edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data+dist.csv')) g = agg.df.groupby(['bird', 'block', 'segment', 'hemi']) for (bird, block, segment, hemi), gdf in g: perfs = list() gg = gdf.groupby(['lambda1', 'lambda2', 'n_unit']) for (lambda1, lambda2, n_unit), ggdf in gg: err = ggdf.err.values[0] perfs.append({'err': err, 'lambda1': lambda1, 'lambda2': lambda2, 'n_unit': n_unit}) perfs.sort(key=operator.itemgetter('err')) best_lambda1 = perfs[0]['lambda1'] best_lambda2 = perfs[0]['lambda2'] best_n_unit = perfs[0]['n_unit'] best_err = perfs[0]['err'] print 'err=%0.3f, lambda1=%0.3f, lambda2=%0.3f, n_unit=%d' % (best_err, best_lambda1, best_lambda2, best_n_unit) i = (gdf.lambda1 == best_lambda1) & (gdf.lambda2 == best_lambda2) & (gdf.n_unit == best_n_unit) assert i.sum() == 16, 'i.sum()=%d' % i.sum() for e in gdf[i].electrode.unique(): ii = (gdf.lambda1 == best_lambda1) & (gdf.lambda2 == best_lambda2) & (gdf.n_unit == best_n_unit) & ( gdf.electrode == e) assert ii.sum() == 1, 'ii.sum()=%d' % ii.sum() iii = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e) assert iii.sum() == 1, 'iii.sum()=%d' % iii.sum() reg = clean_region(edata[iii].region.values[0]) data['bird'].append(bird) data['block'].append(block) data['segment'].append(segment) data['hemi'].append(hemi) data['lambda1'].append(best_lambda1) data['lambda2'].append(best_lambda2) data['n_unit'].append(best_n_unit) data['err'].append(best_err) data['electrode'].append(e) data['linear_cc'].append(gdf[ii].linear_cc.values[0]) data['cc'].append(gdf[ii].cc.values[0]) data['region'].append(reg) data['md5'].append(gdf[ii].md5.values[0]) df = pd.DataFrame(data) df.to_csv('/auto/tdrive/mschachter/data/aggregate/rnn_best.csv', header=True, index=False) fig = plt.figure(figsize=(12, 10), facecolor='w') x = np.linspace(0, 1, 20) plt.plot(x, x, 'k-') plt.plot(df.linear_cc, df.cc, 'go', alpha=0.7, markersize=12) plt.xlabel('Linear CC') plt.ylabel('RNN CC') plt.xlim(0, 0.8) plt.ylim(0, 0.8) fname = os.path.join(get_this_dir(), 'linear_vs_rnn_cc.svg') # plt.savefig(fname, facecolor=fig.get_facecolor(), edgecolor='none') plt.show()
def get_encoder_weight_data_for_psd(agg, include_sync=True, write_to_file=True): edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data+dist.csv')) cdata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'cell_data.csv')) e2e_dists = get_e2e_dists() # put cell data into an efficient lookup table print 'Creating cell lookup table' cell_data = dict() i = cdata.cell1 == cdata.cell2 g = cdata[i].groupby(['bird', 'block', 'segment', 'hemi', 'cell1']) for (bird,block,segment,hemi,ci),gdf in g: assert len(gdf) == 1 # get the electrode and cell indices corresponding to this site wkey = '%s_%s_%s_%s_%s_%s_full' % (bird, block, segment, hemi, 'both', 'psd') index2cell = agg.index2cell[wkey] index2electrode = agg.index2electrode[wkey] cell_index2electrode = agg.cell_index2electrode[wkey] # get cell data rate = gdf.rate.values[0] rate_std = gdf.rate.values[0] cell_electrode = cell_index2electrode[ci] # get the distance from this cell to every other electrode e2e = e2e_dists[(bird,block,hemi)] edist = dict() for e in index2electrode: edist[e] = e2e[(cell_electrode, e)] cell_data[(bird,block,segment,hemi,ci)] = (rate, rate_std, edist) print 'Creating dataset....' # create the dataset wdata = {'bird': list(), 'block': list(), 'segment': list(), 'hemi': list(), 'electrode': list(), 'region': list(), 'f': list(), 'w': list(), 'r2': list(), 'dist_l2a': list(), 'dist_midline': list(), 'wtype': list(), 'cell_index':list(), 'rate_mean': list(), 'rate_std': list(), 'sync_mean': list(), 'sync_std': list(), 'dist_from_electrode': list(), 'dist_cell2cell':list(), 'same_electrode':list(), 'cells_same_electrode':list(), } i = (agg.df.encoder_input == 'both') & (agg.df.encoder_output == 'psd') & (agg.df.decomp == 'full') for wkey in agg.df.wkey[i].values: bird, block, segment, hemi, ein2, eout2, decomp = wkey.split('_') eperfs = agg.encoder_perfs[wkey] eweights = agg.encoder_weights[wkey] index2electrode = agg.index2electrode[wkey] index2cell = agg.index2cell[wkey] cell_index2electrode = agg.cell_index2electrode[wkey] ncells = len(index2cell) for k, e in enumerate(index2electrode): ei = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e) assert ei.sum() == 1 reg = clean_region(edata.region[ei].values[0]) dist_l2a = edata.dist_l2a[ei].values[0] dist_midline = edata.dist_midline[ei].values[0] if bird == 'GreBlu9508M': dist_l2a *= 4 for j, f in enumerate(agg.freqs): r2 = eperfs[k, j] W = eweights[k, j, :, :] assert W.shape == (ncells+1, ncells) # get the spike rate weights for n,ci in enumerate(index2cell): rate,rate_std,edist = cell_data[(bird,block,segment,hemi,ci)] cell_electrode = cell_index2electrode[ci] wdata['bird'].append(bird) wdata['block'].append(block) wdata['segment'].append(segment) wdata['hemi'].append(hemi) wdata['electrode'].append(e) wdata['region'].append(reg) wdata['f'].append(int(f)) wdata['w'].append(W[0, n]) wdata['r2'].append(r2) wdata['dist_l2a'].append(dist_l2a) wdata['dist_midline'].append(dist_midline) wdata['wtype'].append('rate') wdata['rate_mean'].append(rate) wdata['rate_std'].append(rate_std) wdata['sync_mean'].append(-1) wdata['sync_std'].append(-1) wdata['dist_from_electrode'].append(edist[e]) wdata['dist_cell2cell'].append(-1) wdata['cell_index'].append(ci) wdata['same_electrode'].append(int(e == cell_electrode)) wdata['cells_same_electrode'].append(0) if not include_sync: continue # get the synchrony weights for n1, ci1 in enumerate(index2cell): rate1, rate_std1, edist1 = cell_data[(bird, block, segment, hemi, ci1)] for n2 in range(n1): ci2 = index2cell[n2] rate2, rate_std2, edist2 = cell_data[(bird, block, segment, hemi, ci2)] e1 = cell_index2electrode[ci1] e2 = cell_index2electrode[ci2] cells_same_electrode = int(e1 == e2) same_electrode = int(e1 == e2 and e1 == e) dist_cell2cell = edist1[e2] avg_dist_from_electrode = (edist1[e] + edist2[e]) / 2. wdata['bird'].append(bird) wdata['block'].append(block) wdata['segment'].append(segment) wdata['hemi'].append(hemi) wdata['electrode'].append(e) wdata['region'].append(reg) wdata['f'].append(int(f)) wdata['w'].append(W[n1+1, n2]) wdata['r2'].append(r2) wdata['dist_l2a'].append(dist_l2a) wdata['dist_midline'].append(dist_midline) wdata['wtype'].append('sync') wdata['rate_mean'].append(-1) wdata['rate_std'].append(-1) wdata['sync_mean'].append(-1) wdata['sync_std'].append(-1) wdata['dist_from_electrode'].append(avg_dist_from_electrode) wdata['dist_cell2cell'].append(dist_cell2cell) wdata['cell_index'].append(-1) wdata['same_electrode'].append(same_electrode) wdata['cells_same_electrode'].append(cells_same_electrode) wdf = pd.DataFrame(wdata) if write_to_file: wdf.to_csv('/auto/tdrive/mschachter/data/aggregate/lfp_encoder_weights.csv', index=False) return wdf
def plot_maps(agg, data_dir='/auto/tdrive/mschachter/data'): edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data+dist.csv')) data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(), 'electrode':list(), 'reg':list(), 'dm':list(), 'dl':list(), 'aprop':list(), 'r2':list()} df = agg.df # encoder performance maps aprops_to_show = APROPS_TO_SHOW # build a dataset that makes it easy to plot single decoder performance g = df.groupby(['bird', 'block', 'segment', 'hemi', 'electrode', 'aprop']) for (bird,block,segment,hemi,electrode,aprop),gdf in g: assert len(gdf) == 1 ei = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == electrode) assert ei.sum() == 1 reg = clean_region(edata.region[ei].values[0]) dist_l2a = edata.dist_l2a[ei].values[0] dist_midline = edata.dist_midline[ei].values[0] if bird == 'GreBlu9508M': dist_l2a *= 4 data['bird'].append(bird) data['block'].append(block) data['segment'].append(segment) data['hemi'].append(hemi) data['dm'].append(dist_midline) data['dl'].append(dist_l2a) data['r2'].append(gdf.r2.values[0]) data['reg'].append(reg) data['electrode'].append(electrode) data['aprop'].append(aprop) df = pd.DataFrame(data) i = ~np.isnan(df.dm) & ~np.isnan(df.dl) & ~np.isnan(df.r2) & (df.r2 > 0) df = df[i] print df.describe() def _plot_map(_pdata, _ax, _cmap, _maxval, _bgcolor=None, _perf_alpha=False, _plot_region=False, _msize=60, _region_only=False): if _bgcolor is not None: _ax.set_axis_bgcolor(_bgcolor) _pval = _pdata['df'].r2.values _x = _pdata['df'].dm.values _y = _pdata['df'].dl.values _regs = _pdata['df'].reg.values plt.sca(_ax) _alpha = np.ones([len(_pval)]) if _perf_alpha: _alpha = _pdata['df'].r2.values _alpha /= _alpha.max() _alpha[_alpha > 0.9] = 1. _clrs = _cmap(_pval / _maxval) else: _clrs = _cmap(_pval / _maxval) if not _region_only: plt.scatter(_x, _y, c=_pval, marker='o', cmap=_cmap, vmin=0, s=_msize, alpha=0.6) _cbar = plt.colorbar(label='Decoder R2') _new_ytks = ['%0.2f' % float(_yt.get_text()) for _yt in _cbar.ax.get_yticklabels()] # print '_new_ytks=', _new_ytks _cbar.ax.set_yticklabels(_new_ytks) plt.xlabel('Dist to Midline (mm)') plt.ylabel('Dist to L2A (mm)') # print 'ytks=',_ytks plt.xlim(0, 2.5) plt.ylim(-1, 1) if _plot_region: for _k,(_xx,_yy) in enumerate(zip(_x, _y)): if _regs[_k] not in ['HP', '?'] and '-' not in _regs[k]: plt.text(_xx, _yy, _regs[_k], fontsize=10, color='k', alpha=0.7) def rb_cmap(x): assert np.abs(x).max() <= 1 _rgb = np.zeros([len(x), 3]) _pos = x >= 0 _neg = x < 0 _rgb[_pos, 0] = x[_pos] _rgb[_neg, 2] = np.abs(x[_neg]) return _rgb figsize = (23, 13) fig = plt.figure(figsize=figsize) fig.subplots_adjust(left=0.05, right=0.98, hspace=0.25, wspace=0.25) nrows = 2 ncols = 3 for k, aprop in enumerate(aprops_to_show): ax = plt.subplot(nrows, ncols, k+1) i = df.aprop == aprop max_r2 = df[i].r2.max() print 'k=%d, %s: max_r2=%0.2f' % (k, aprop, max_r2) # _plot_map({'df':df[i]}, ax, magma, max_r2, _bgcolor='k', _perf_alpha=False, _plot_region=False) _plot_map({'df': df[i]}, ax, plt.cm.afmhot_r, max_r2,_bgcolor='w',) plt.title(ACOUSTIC_PROP_NAMES[aprop]) ax = plt.subplot(nrows, ncols, 5) # _plot_map({'df': df[df.aprop == 'maxAmp']}, ax, plt.cm.afmhot_r, 1., _bgcolor='w', _plot_region=True, _region_only=True) plot_r2_region_prop(ax) fname = os.path.join(get_this_dir(), 'single_electrode_decoder_r2.svg') plt.savefig(fname, facecolor='w', edgecolor='none') plt.show()
def export_dfs(agg, data_dir='/auto/tdrive/mschachter/data'): freqs = agg.freqs # read electrode data edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data.csv')) decomp_list = ['lfp', 'spike', 'spike_rate'] no_lkrat_list = ['spike_rate'] pair_list = [('lfp', 'locked'), ('spike', 'spike_psd'), ('spike_rate', 'spike_rate')] for ncomp in range(1, 12): decomp_list.append('locked_pca_%d' % ncomp) decomp_list.append('spike_psd_pca_%d' % ncomp) no_lkrat_list.append('locked_pca_%d' % ncomp) no_lkrat_list.append('spike_psd_pca_%d' % ncomp) pair_list.append( ('locked_pca_%d' % ncomp, 'locked_pca_%d' % ncomp) ) pair_list.append( ('spike_psd_pca_%d' % ncomp, 'spike_psd_pca_%d' % ncomp) ) # initialize multi electrode dataset dictionary multi_electrode_data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(), 'band':list()} anames = agg.acoustic_props + ['category'] for aprop in anames: for t in decomp_list: multi_electrode_data['perf_%s_%s' % (aprop, t)] = list() if t not in no_lkrat_list: multi_electrode_data['lkrat_%s_%s' % (aprop, t)] = list() # initialize multielectrode performance dictionary (a little bit different than the dataset dictionary me_perf_data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(), 'perf':list(), 'decomp':list(), 'aprop':list()} # initialize single electrode dataset dictionary single_electrode_data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(), 'electrode':list(), 'region':list()} anames = agg.acoustic_props + ['category'] for aprop in anames: single_electrode_data['perf_%s' % aprop] = list() single_electrode_data['lkrat_%s' % aprop] = list() # initialize single cell dataset dictionary cell_data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(), 'electrode':list(), 'region':list(), 'cell_index':list()} for aprop in anames: cell_data['perf_%s' % aprop] = list() cell_data['lkrat_%s' % aprop] = list() nbands = len(freqs) i = agg.df.bird != 'BlaBro09xxF' g = agg.df[i].groupby(['bird', 'block', 'segment', 'hemi']) for (bird,block,segment,hemi),gdf in g: wtup = (bird,block,segment,hemi) index2electrode = agg.index2electrode[wtup] cell_index2electrode = agg.cell_index2electrode[wtup] # compute the number of cells, use it to compute significance thresholds for the likelihood ratios i = (gdf.e1 != -1) & (gdf.e1 == gdf.e2) & (gdf.cell_index != -1) & (gdf.decomp == 'spike_psd') & \ (gdf.exel == False) & (gdf.exfreq == False) & (gdf.aprop == 'q2') ncells = i.sum() # print '%s,%s,%s,%s # of cells: %d' % (bird, block, segment, hemi, ncells) chi2_x = np.linspace(0, 100, 10000) # get the region by electrode electrode2region = dict() for e in index2electrode: i = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e) assert i.sum() == 1 electrode2region[e] = clean_region(edata.region[i].values[0]) # collect multi-electrode multi-band dataset band0_perfs = None for b in range(nbands+1): exfreq = b > 0 perfs = dict() anames = agg.acoustic_props + ['category'] for aprop in anames: for t,decomp in pair_list: if decomp in no_lkrat_list and b > 0: perfs['perf_%s_%s' % (aprop, t)] = 0 continue # get multielectrode LFP decoder performance i = (gdf.e1 == -1) & (gdf.e2 == -1) & (gdf.cell_index == -1) & (gdf.band == b) & (gdf.exfreq == exfreq) & \ (gdf.exel == False) & (gdf.aprop == aprop) & (gdf.decomp == decomp) """ print '------------' print 'decomp=%s' % decomp print 'unique decomps:',gdf.decomp.unique() iii = (gdf.e1 == -1) & (gdf.e2 == -1) & (gdf.cell_index == -1) & (gdf.band == b) print 'iii.sum()=%d' % iii.sum() iiii = (gdf.e1 == -1) & (gdf.e2 == -1) & (gdf.cell_index == -1) & (gdf.band == b) & (gdf.decomp == decomp) print 'iiii.sum()=%d' % iiii.sum() iiiii = (gdf.e1 == -1) & (gdf.e2 == -1) & (gdf.cell_index == -1) & (gdf.band == b) & (gdf.aprop == aprop) print 'iiiii.sum()=%d' % iiiii.sum() iiiiii = (gdf.e1 == -1) & (gdf.e2 == -1) & (gdf.cell_index == -1) & (gdf.band == b) & (gdf.aprop == aprop) & (gdf.decomp == decomp) print 'iiiiii.sum()=%d' % iiiiii.sum() """ if i.sum() != 1: print 'len(gdf)=%d' % len(gdf) print gdf print "Zero or more than 1 result for (%s, %s, %s, %s), decomp=%s, band=%d, aprop=%s, exfreq=%d, exel=%d: i.sum()=%d" % (bird, block, segment, hemi, decomp, b, aprop, exfreq, False, i.sum()) return continue if aprop == 'category': mperf = gdf.pcc[i].values[0] else: mperf = gdf.r2[i].values[0] perfs['perf_%s_%s' % (aprop, t)] = mperf lk = gdf.likelihood[i].values[0] # if aprop == 'category': # nsamps = gdf.num_samps[i].values[0] # lk *= nsamps perfs['lk_%s_%s' % (aprop, t)] = lk eff_dof = gdf.effective_dof[i].values[0] perfs['dof_%s_%s' % (aprop, t)] = eff_dof if b == 0: me_perf_data['bird'].append(bird) me_perf_data['block'].append(block) me_perf_data['segment'].append(segment) me_perf_data['hemi'].append(hemi) me_perf_data['perf'].append(mperf) me_perf_data['aprop'].append(aprop) me_perf_data['decomp'].append(decomp) multi_electrode_data['bird'].append(bird) multi_electrode_data['block'].append(block) multi_electrode_data['segment'].append(segment) multi_electrode_data['hemi'].append(hemi) multi_electrode_data['band'].append(b) for k,v in perfs.items(): if k.startswith('perf'): multi_electrode_data[k].append(v) if b == 0: band0_perfs = perfs for aprop in anames: for t in ['lfp', 'spike']: multi_electrode_data['lkrat_%s_%s' % (aprop,t)].append(0) else: # compute the likelihood ratio for each acoustic property on this band for aprop in anames: for t in ['lfp', 'spike']: # compute the likelihood ratio full_likelihood = band0_perfs['lk_%s_%s' % (aprop, t)] leave_one_out_likelihood = perfs['lk_%s_%s' % (aprop, t)] lkrat = 2*(leave_one_out_likelihood - full_likelihood) # grab the effective degrees of freedom and compute the chi2 significant threshold full_dof = band0_perfs['dof_%s_%s' % (aprop, t)] loo_dof = perfs['dof_%s_%s' % (aprop, t)] if aprop != 'category': if t == 'lfp': dof = 16 elif t == 'spike': dof = ncells p = chi2.pdf(chi2_x, dof) pi = p > 0.01 if pi.sum() == 0: """ plt.figure() plt.plot(chi2_x, p, 'k-') plt.axis('tight') plt.title('aprop=%s, full_dof=%0.2f, loo_dof=%0.2f' % (aprop, full_dof, loo_dof)) plt.show() """ print '**** No thresh for aprop=%s, lkrat=%0.3f, full_dof=%0.2f, loo_dof=%0.2f' % (aprop, lkrat, full_dof, loo_dof) sig_thresh = 1. else: sig_thresh = min(chi2_x[pi]) # print 'aprop=%s, t=%s, lkrat=%0.3f, full_dof=%0.2f, loo_dof=%0.2f, sig_thresh=%0.6f' % \ # (aprop, t, lkrat, full_dof, loo_dof, sig_thresh) else: sig_thresh = 1. lkrat /= sig_thresh multi_electrode_data['lkrat_%s_%s' % (aprop, t)].append(lkrat) """ # collect single electrode dataset for e in index2electrode: # get LFP performance data for this electrode, with and without leave-one-out (the variable "exel") perfs = dict() perfs_exel = dict() anames = agg.acoustic_props + ['category'] for aprop in anames: for exel in [True, False]: p = perfs if exel: p = perfs_exel # get multielectrode LFP decoder performance i = (gdf.e1 == e) & (gdf.e2 == e) & (gdf.cell_index == -1) & (gdf.band == 0) & (gdf.exfreq == False) & \ (gdf.exel == exel) & (gdf.aprop == aprop) & (gdf.decomp == 'locked') assert i.sum() == 1, "Zero or more than 1 result for (%s, %s, %s, %s), decomp=locked, e=%d: i.sum()=%d" % (bird, block, segment, hemi, e, i.sum()) if aprop == 'category': p['perf_%s' % aprop] = gdf.pcc[i].values[0] else: p['perf_%s' % aprop] = gdf.r2[i].values[0] lk = gdf.likelihood[i].values[0] if aprop == 'category': nsamps = gdf.num_samps[i].values[0] lk *= nsamps p['lk_%s' % aprop] = lk # append the single electrode performances and likelihood ratios to the single electrode dataset single_electrode_data['bird'].append(bird) single_electrode_data['block'].append(block) single_electrode_data['segment'].append(segment) single_electrode_data['hemi'].append(hemi) single_electrode_data['electrode'].append(e) single_electrode_data['region].append(electrode2region[e]) for aprop in anames: # append single electrode peformance single_electrode_data['perf_%s' % aprop].append(perfs['perf_%s' % aprop]) # append likelihood ratio full_likelihood = band0_perfs['lk_%s_%s' % (aprop, 'lfp')] leave_one_out_likelihood = perfs_exel['lk_%s' % aprop] lkrat = 2*(leave_one_out_likelihood - full_likelihood) lkrat /= sig_thresh_electrode_or_cell_acoustic single_electrode_data['lkrat_%s' % aprop].append(lkrat) # collect single cell dataset for e in index2electrode: # count the number of cells and get their indices i = (gdf.e1 == e) & (gdf.e2 == e) & (gdf.cell_index != -1) & (gdf.band == 0) & (gdf.exfreq == False) & \ (gdf.exel == False) & (gdf.decomp == 'spike_psd') if i.sum() == 0: print 'No cells for (%s, %s, %s, %s), e=%d' % (bird, block, segment, hemi, e) continue cell_indices = sorted(gdf[i].cell_index.unique()) for ci in cell_indices: missing_data = False # get cell performance data for this electrode, with and without leave-one-out (the variable "exel") perfs = dict() perfs_exel = dict() anames = agg.acoustic_props + ['category'] for aprop in anames: for exel in [True, False]: p = perfs if exel: p = perfs_exel # get multielectrode LFP decoder performance i = (gdf.e1 == e) & (gdf.e2 == e) & (gdf.cell_index == ci) & (gdf.band == 0) & (gdf.exfreq == False) & \ (gdf.exel == exel) & (gdf.aprop == aprop) & (gdf.decomp == 'spike_psd') if i.sum() == 0: print "No result for (%s, %s, %s, %s), decomp=spike_psd, e=%d, ci=%d: i.sum()=%d" % (bird, block, segment, hemi, e, ci, i.sum()) missing_data = True continue if i.sum() > 1: print "More than 1 result for (%s, %s, %s, %s), decomp=spike_psd, e=%d, ci=%d: i.sum()=%d" % (bird, block, segment, hemi, e, ci, i.sum()) missing_data = True continue if aprop == 'category': p['perf_%s' % aprop] = gdf.pcc[i].values[0] else: p['perf_%s' % aprop] = gdf.r2[i].values[0] lk = gdf.likelihood[i].values[0] if aprop == 'category': nsamps = gdf.num_samps[i].values[0] lk *= nsamps p['lk_%s' % aprop] = lk if missing_data: print 'Skipping cell %d on electrode %d for (%s, %s, %s, %s)' % (ci, e, bird, block, segment, hemi) continue # append the single electrode performances and likelihood ratios to the single electrode dataset cell_data['bird'].append(bird) cell_data['block'].append(block) cell_data['segment'].append(segment) cell_data['hemi'].append(hemi) cell_data['electrode'].append(e) cell_data['region'].append(electrode2region[e]) cell_data['cell_index'].append(ci) for aprop in anames: # append single electrode peformance cell_data['perf_%s' % aprop].append(perfs['perf_%s' % aprop]) # append likelihood ratio full_likelihood = band0_perfs['lk_%s_%s' % (aprop, 'spike')] leave_one_out_likelihood = perfs_exel['lk_%s' % aprop] lkrat = 2*(leave_one_out_likelihood - full_likelihood) lkrat /= sig_thresh_electrode_or_cell_acoustic cell_data['lkrat_%s' % aprop].append(lkrat) """ df_me = pd.DataFrame(multi_electrode_data) df_me.to_csv(os.path.join(data_dir, 'aggregate', 'multi_electrode_perfs.csv'), index=False) df_me_perf = pd.DataFrame(me_perf_data) df_me_perf.to_csv(os.path.join(data_dir, 'aggregate', 'multi_electrode_perfs_for_glm.csv'), index=False) """
def get_encoder_weights_squared(agg, decomp, data_dir='/auto/tdrive/mschachter/data'): freqs, lags = get_freqs_and_lags() i = agg.df.decomp == decomp assert i.sum() > 0 g = agg.df[i].groupby(['bird', 'block', 'segment', 'hemi']) edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data.csv')) wdata = {'region': list(), 'freq': list(), 'xindex': list(), 'eperf':list()} Wsq = list() for (bird, block, seg, hemi), gdf in g: assert len(gdf) == 1 wkey = gdf['wkey'].values[0] iindex = gdf['iindex'].values[0] eperf = agg.encoder_perfs[wkey] eweights = agg.encoder_weights[wkey] index2electrode = agg.index2electrode[wkey] for k, e in enumerate(index2electrode): regi = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e) assert regi.sum() == 1 reg = clean_region(edata[regi].region.values[0]) for j, f in enumerate(freqs): w = eweights[k, j, :] wdata['eperf'].append(eperf[k, j]) wdata['region'].append(reg) wdata['freq'].append(int(f)) wdata['xindex'].append(len(Wsq)) w2 = w**2 w2 /= w2.sum() Wsq.append(w2) wdf = pd.DataFrame(wdata) Wsq = np.array(Wsq) # compute the average encoder weights by frequency r2_thresh = 0.05 Wsq_by_freq = np.zeros([len(USED_ACOUSTIC_PROPS), len(freqs)]) for j, f in enumerate(freqs): i = (wdf.freq == int(f)) & (wdf.eperf > r2_thresh) ii = wdf.xindex[i].values Wsq_by_freq[:, j] = Wsq[ii, :].mean(axis=0) # compute the average encoder weights by region regs = ['L2', 'CMM', 'CML', 'L1', 'L3', 'NCM'] Wsq_by_reg = np.zeros([len(USED_ACOUSTIC_PROPS), len(regs)]) for j, reg in enumerate(regs): i = wdf.region == reg ii = wdf.xindex[i].values Wsq_by_reg[:, j] = Wsq[ii, :].mean(axis=0) return wdf,Wsq,Wsq_by_freq,Wsq_by_reg
def export_pairwise_encoder_datasets_for_glm(agg, data_dir='/auto/tdrive/mschachter/data'): freqs,lags = get_freqs_and_lags() edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data+dist.csv')) data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(), 'electrode1':list(), 'electrode2':list(), 'regions':list(), 'site':list(), 'lag':list(), 'r2':list(), 'dist':list()} weight_data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(), 'electrode1':list(), 'electrode2':list(), 'regions':list(), 'site':list(), 'lag':list(), 'aprop':list(), 'w':list(), 'dist':list()} decomp = 'self+cross_locked' i = agg.df.decomp == decomp g = agg.df[i].groupby(['bird', 'block', 'segment', 'hemi']) for (bird,block,seg,hemi),gdf in g: assert len(gdf) == 1 wkey = gdf['wkey'].values[0] index2electrode = agg.index2electrode[wkey] eperf = agg.encoder_perfs[wkey] eweights = agg.encoder_weights[wkey] # normalize weights! eweights /= np.abs(eweights).max() site = '%s_%s_%s_%s' % (bird, block, seg, hemi) for k,e1 in enumerate(index2electrode): regi = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e1) assert regi.sum() == 1 reg1 = clean_region(edata[regi].region.values[0]) eloc1 = np.array([edata[regi].dist_midline.values[0], edata[regi].dist_l2a.values[0]]) for j in range(k): e2 = index2electrode[j] regi = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e2) assert regi.sum() == 1 reg2 = clean_region(edata[regi].region.values[0]) eloc2 = np.array([edata[regi].dist_midline.values[0], edata[regi].dist_l2a.values[0]]) # compute the distance between electrodes in anatomical coordinates edist = np.linalg.norm(eloc1 - eloc2) for li,lag in enumerate(lags): r2 = eperf[k, j, li] if lag < 0: regs = '%s->%s' % (reg2, reg1) else: regs = '%s->%s' % (reg1, reg2) data['bird'].append(bird) data['block'].append(block) data['segment'].append(seg) data['hemi'].append(hemi) data['electrode1'].append(e1) data['electrode2'].append(e2) data['regions'].append(regs) data['site'].append(site) data['lag'].append(int(lag)) data['r2'].append(r2) data['dist'].append(edist) for ai,aprop in enumerate(USED_ACOUSTIC_PROPS): w = eweights[k, j, li, ai] weight_data['bird'].append(bird) weight_data['block'].append(block) weight_data['segment'].append(seg) weight_data['hemi'].append(hemi) weight_data['electrode1'].append(e1) weight_data['electrode2'].append(e2) weight_data['regions'].append(regs) weight_data['site'].append(site) weight_data['lag'].append(int(lag)) weight_data['aprop'].append(aprop) weight_data['w'].append(w) weight_data['dist'].append(edist) df = pd.DataFrame(data) df.to_csv(os.path.join(data_dir, 'aggregate', 'pairwise_encoder_perfs_for_glm.csv'), header=True, index=False) wdf = pd.DataFrame(weight_data) wdf.to_csv(os.path.join(data_dir, 'aggregate', 'pairwise_encoder_weights_for_glm.csv'), header=True, index=False)
def export_psd_encoder_datasets_for_glm(agg, data_dir='/auto/tdrive/mschachter/data'): freqs,lags = get_freqs_and_lags() edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data.csv')) data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(), 'electrode':list(), 'cell_index':list(), 'region':list(), 'site':list(), 'freq':list(), 'r2':list()} decomps = ['full_psds', 'spike_rate'] assert isinstance(agg, AcousticEncoderDecoderAggregator) for decomp in decomps: i = agg.df.decomp == decomp g = agg.df[i].groupby(['bird', 'block', 'segment', 'hemi']) for (bird,block,seg,hemi),gdf in g: assert len(gdf) == 1 wkey = gdf['wkey'].values[0] eperf = agg.encoder_perfs[wkey] index2electrode = agg.index2electrode[wkey] cell_index2electrode = agg.cell_index2electrode[wkey] site = '%s_%s_%s_%s' % (bird, block, seg, hemi) if decomp.endswith('psds'): for k,e in enumerate(index2electrode): for j,f in enumerate(freqs): regi = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e) assert regi.sum() == 1 reg = clean_region(edata[regi].region.values[0]) data['bird'].append(bird) data['block'].append(block) data['segment'].append(seg) data['hemi'].append(hemi) data['electrode'].append(e) data['region'].append(reg) data['cell_index'].append(-1) data['site'].append(site) data['freq'].append(int(f)) data['r2'].append(eperf[k, j]) elif decomp == 'spike_rate': for ci,e in enumerate(cell_index2electrode): regi = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e) assert regi.sum() == 1 reg = clean_region(edata[regi].region.values[0]) data['bird'].append(bird) data['block'].append(block) data['segment'].append(seg) data['hemi'].append(hemi) data['electrode'].append(e) data['region'].append(reg) data['cell_index'].append(ci) data['site'].append(site) data['freq'].append(-1) data['r2'].append(eperf[ci]) df = pd.DataFrame(data) df.to_csv(os.path.join(data_dir, 'aggregate', 'encoder_perfs_for_glm.csv'), header=True, index=False)