示例#1
0
def export_weight_ds(agg, data_dir='/auto/tdrive/mschachter/data', decomp='full_psds'):

    freqs,lags = get_freqs_and_lags()

    assert isinstance(agg, AcousticEncoderDecoderAggregator)
    edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data+dist.csv'))

    data = {'bird': list(), 'block': list(), 'segment': list(), 'hemi': list(),
            'electrode': list(), 'reg': list(), 'dm': list(), 'dl': list(),
            'aprop': list(), 'r2': list(), 'f':list(), 'w':list()}

    aprops = USED_ACOUSTIC_PROPS
    nprops = len(aprops)

    i = agg.df.decomp == decomp
    g = agg.df[i].groupby(['bird', 'block', 'segment', 'hemi'])
    for (bird, block, segment, hemi), gdf in g:

        assert len(gdf) == 1
        wkey = gdf.wkey.values[0]
        index2electrode = agg.index2electrode[wkey]
        nelectrodes = len(index2electrode)
        nfreqs = len(freqs)

        dweights = agg.decoder_weights[wkey]
        assert dweights.shape == (nelectrodes, nfreqs, nprops), "dweights.shape=%s" % str(dweights.shape)

        dperfs = agg.decoder_perfs[wkey]
        assert len(dperfs) == nprops

        for k,e in enumerate(index2electrode):

            ei = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e)
            assert ei.sum() == 1
            reg = clean_region(edata.region[ei].values[0])
            dist_l2a = edata.dist_l2a[ei].values[0]
            dist_midline = edata.dist_midline[ei].values[0]

            if bird == 'GreBlu9508M':
                dist_l2a *= 4

            for j,f in enumerate(freqs):
                for m,aprop in enumerate(aprops):

                    data['bird'].append(bird)
                    data['block'].append(block)
                    data['segment'].append(segment)
                    data['hemi'].append(hemi)
                    data['electrode'].append(e)
                    data['reg'].append(reg)
                    data['dm'].append(dist_midline)
                    data['dl'].append(dist_l2a)
                    data['aprop'].append(aprop)
                    data['r2'].append(dperfs[m])
                    data['f'].append(int(f))
                    data['w'].append(dweights[k, j, m])

    df = pd.DataFrame(data)
    df.to_csv(os.path.join(data_dir, 'aggregate', 'decoder_weights_for_glm.csv'), header=True, index=False)
示例#2
0
def get_encoder_perf_data_for_psd(agg, ein=None):

    if ein is None:
        i = (agg.df.encoder_input == 'rate') | (agg.df.encoder_input == 'both')
    else:
        i = (agg.df.encoder_input == ein)
    i &= (agg.df.encoder_output == 'psd') & (agg.df.decomp == 'full')

    edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data+dist.csv'))

    pdata = {'bird': list(), 'block': list(), 'segment': list(), 'hemi': list(),
             'electrode': list(), 'region': list(), 'f': list(), 'r2': list(),
             'dist_l2a': list(), 'dist_midline': list(), 'ein':list()}

    for wkey in agg.df.wkey[i].values:
        bird, block, segment, hemi, ein2, eout2, decomp = wkey.split('_')

        eperfs = agg.encoder_perfs[wkey]
        index2electrode = agg.index2electrode[wkey]

        for k, e in enumerate(index2electrode):

            ei = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e)
            assert ei.sum() == 1
            reg = clean_region(edata.region[ei].values[0])
            dist_l2a = edata.dist_l2a[ei].values[0]
            dist_midline = edata.dist_midline[ei].values[0]

            if bird == 'GreBlu9508M':
                dist_l2a *= 4

            for j, f in enumerate(agg.freqs):
                pdata['bird'].append(bird)
                pdata['block'].append(block)
                pdata['segment'].append(segment)
                pdata['hemi'].append(hemi)
                pdata['electrode'].append(e)
                pdata['ein'].append(ein2)

                pdata['region'].append(reg)
                pdata['dist_l2a'].append(dist_l2a)
                pdata['dist_midline'].append(dist_midline)

                pdata['f'].append(int(f))
                pdata['r2'].append(eperfs[k, j])

    df = pd.DataFrame(pdata)
    if ein is not None:
        df.to_csv('/auto/tdrive/mschachter/data/aggregate/lfp_encoder_perfs_%s.csv' % ein, index=False, header=True)
    else:
        df.to_csv('/auto/tdrive/mschachter/data/aggregate/lfp_encoder_perfs.csv', index=False, header=True)

    return df
示例#3
0
def export_ds(agg, data_dir='/auto/tdrive/mschachter/data'):

    data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(), 'electrode':list(),
            'aprop':list(), 'region':list(), 'dist_midline':list(), 'dist_l2a':list(), 'r2':list()}

    edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data+dist.csv'))
    i = edata.bird != 'BlaBro09xxF'
    edata = edata[i]

    g = agg.df.groupby(['bird', 'block', 'segment', 'hemi', 'electrode', 'aprop'])
    for (bird,block,segment,hemi,electrode,aprop),gdf in g:

        assert len(gdf) == 1

        ei = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == electrode)
        assert ei.sum() == 1
        reg = clean_region(edata.region[ei].values[0])
        dist_l2a = edata.dist_l2a[ei].values[0]
        dist_midline = edata.dist_midline[ei].values[0]

        data['bird'].append(bird)
        data['block'].append(block)
        data['segment'].append(segment)
        data['hemi'].append(hemi)
        data['electrode'].append(electrode)
        data['aprop'].append(aprop)
        data['region'].append(reg)
        data['dist_midline'].append(dist_midline)
        data['dist_l2a'].append(dist_l2a)
        data['r2'].append(gdf.r2.values[0])

    df = pd.DataFrame(data)
    i = ~np.isnan(df.dist_l2a) & ~np.isnan(df.dist_midline)

    df.to_csv(os.path.join(data_dir, 'aggregate', 'single_electrode_decoder.csv'), header=True, index=False)

    return df[i]
示例#4
0
def stats(agg, data_dir='/auto/tdrive/mschachter/data'):
    data = {'bird': list(), 'block': list(), 'segment': list(), 'hemi': list(), 'electrode': list(),
            'linear_cc': list(), 'cc': list(), 'err': list(),
            'lambda1': list(), 'lambda2': list(), 'n_unit': list(), 'region':list(), 'md5':list()}

    edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data+dist.csv'))

    g = agg.df.groupby(['bird', 'block', 'segment', 'hemi'])
    for (bird, block, segment, hemi), gdf in g:

        perfs = list()
        gg = gdf.groupby(['lambda1', 'lambda2', 'n_unit'])
        for (lambda1, lambda2, n_unit), ggdf in gg:
            err = ggdf.err.values[0]
            perfs.append({'err': err, 'lambda1': lambda1, 'lambda2': lambda2, 'n_unit': n_unit})

        perfs.sort(key=operator.itemgetter('err'))

        best_lambda1 = perfs[0]['lambda1']
        best_lambda2 = perfs[0]['lambda2']
        best_n_unit = perfs[0]['n_unit']
        best_err = perfs[0]['err']

        print 'err=%0.3f, lambda1=%0.3f, lambda2=%0.3f, n_unit=%d' % (best_err, best_lambda1, best_lambda2, best_n_unit)

        i = (gdf.lambda1 == best_lambda1) & (gdf.lambda2 == best_lambda2) & (gdf.n_unit == best_n_unit)
        assert i.sum() == 16, 'i.sum()=%d' % i.sum()

        for e in gdf[i].electrode.unique():
            ii = (gdf.lambda1 == best_lambda1) & (gdf.lambda2 == best_lambda2) & (gdf.n_unit == best_n_unit) & (
            gdf.electrode == e)
            assert ii.sum() == 1, 'ii.sum()=%d' % ii.sum()

            iii = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e)
            assert iii.sum() == 1, 'iii.sum()=%d' % iii.sum()
            reg = clean_region(edata[iii].region.values[0])

            data['bird'].append(bird)
            data['block'].append(block)
            data['segment'].append(segment)
            data['hemi'].append(hemi)
            data['lambda1'].append(best_lambda1)
            data['lambda2'].append(best_lambda2)
            data['n_unit'].append(best_n_unit)
            data['err'].append(best_err)
            data['electrode'].append(e)
            data['linear_cc'].append(gdf[ii].linear_cc.values[0])
            data['cc'].append(gdf[ii].cc.values[0])
            data['region'].append(reg)
            data['md5'].append(gdf[ii].md5.values[0])

    df = pd.DataFrame(data)
    df.to_csv('/auto/tdrive/mschachter/data/aggregate/rnn_best.csv', header=True, index=False)

    fig = plt.figure(figsize=(12, 10), facecolor='w')
    x = np.linspace(0, 1, 20)
    plt.plot(x, x, 'k-')
    plt.plot(df.linear_cc, df.cc, 'go', alpha=0.7, markersize=12)
    plt.xlabel('Linear CC')
    plt.ylabel('RNN CC')
    plt.xlim(0, 0.8)
    plt.ylim(0, 0.8)

    fname = os.path.join(get_this_dir(), 'linear_vs_rnn_cc.svg')
    # plt.savefig(fname, facecolor=fig.get_facecolor(), edgecolor='none')

    plt.show()
示例#5
0
def get_encoder_weight_data_for_psd(agg, include_sync=True, write_to_file=True):

    edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data+dist.csv'))
    cdata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'cell_data.csv'))

    e2e_dists = get_e2e_dists()

    # put cell data into an efficient lookup table
    print 'Creating cell lookup table'
    cell_data = dict()
    i = cdata.cell1 == cdata.cell2
    g = cdata[i].groupby(['bird', 'block', 'segment', 'hemi', 'cell1'])
    for (bird,block,segment,hemi,ci),gdf in g:

        assert len(gdf) == 1

        # get the electrode and cell indices corresponding to this site
        wkey = '%s_%s_%s_%s_%s_%s_full' % (bird, block, segment, hemi, 'both', 'psd')
        index2cell = agg.index2cell[wkey]
        index2electrode = agg.index2electrode[wkey]
        cell_index2electrode = agg.cell_index2electrode[wkey]

        # get cell data
        rate = gdf.rate.values[0]
        rate_std = gdf.rate.values[0]
        cell_electrode = cell_index2electrode[ci]

        # get the distance from this cell to every other electrode
        e2e = e2e_dists[(bird,block,hemi)]
        edist = dict()
        for e in index2electrode:
            edist[e] = e2e[(cell_electrode, e)]

        cell_data[(bird,block,segment,hemi,ci)] = (rate, rate_std, edist)

    print 'Creating dataset....'
    # create the dataset
    wdata = {'bird': list(), 'block': list(), 'segment': list(), 'hemi': list(),
             'electrode': list(), 'region': list(), 'f': list(), 'w': list(), 'r2': list(),
             'dist_l2a': list(), 'dist_midline': list(), 'wtype': list(),
             'cell_index':list(),
             'rate_mean': list(), 'rate_std': list(),
             'sync_mean': list(), 'sync_std': list(),
             'dist_from_electrode': list(),
             'dist_cell2cell':list(),
             'same_electrode':list(), 'cells_same_electrode':list(),
             }

    i = (agg.df.encoder_input == 'both') & (agg.df.encoder_output == 'psd') & (agg.df.decomp == 'full')
    for wkey in agg.df.wkey[i].values:
        bird, block, segment, hemi, ein2, eout2, decomp = wkey.split('_')

        eperfs = agg.encoder_perfs[wkey]
        eweights = agg.encoder_weights[wkey]
        
        index2electrode = agg.index2electrode[wkey]
        index2cell = agg.index2cell[wkey]
        cell_index2electrode = agg.cell_index2electrode[wkey]
        ncells = len(index2cell)

        for k, e in enumerate(index2electrode):

            ei = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e)
            assert ei.sum() == 1
            reg = clean_region(edata.region[ei].values[0])
            dist_l2a = edata.dist_l2a[ei].values[0]
            dist_midline = edata.dist_midline[ei].values[0]

            if bird == 'GreBlu9508M':
                dist_l2a *= 4

            for j, f in enumerate(agg.freqs):
                r2 = eperfs[k, j]
                W = eweights[k, j, :, :]
                assert W.shape == (ncells+1, ncells)

                # get the spike rate weights
                for n,ci in enumerate(index2cell):

                    rate,rate_std,edist = cell_data[(bird,block,segment,hemi,ci)]
                    cell_electrode = cell_index2electrode[ci]

                    wdata['bird'].append(bird)
                    wdata['block'].append(block)
                    wdata['segment'].append(segment)
                    wdata['hemi'].append(hemi)
                    wdata['electrode'].append(e)
                    wdata['region'].append(reg)
                    wdata['f'].append(int(f))
                    wdata['w'].append(W[0, n])
                    wdata['r2'].append(r2)
                    wdata['dist_l2a'].append(dist_l2a)
                    wdata['dist_midline'].append(dist_midline)
                    wdata['wtype'].append('rate')
                    wdata['rate_mean'].append(rate)
                    wdata['rate_std'].append(rate_std)
                    wdata['sync_mean'].append(-1)
                    wdata['sync_std'].append(-1)
                    wdata['dist_from_electrode'].append(edist[e])
                    wdata['dist_cell2cell'].append(-1)
                    wdata['cell_index'].append(ci)
                    wdata['same_electrode'].append(int(e == cell_electrode))
                    wdata['cells_same_electrode'].append(0)

                if not include_sync:
                    continue

                # get the synchrony weights
                for n1, ci1 in enumerate(index2cell):
                    rate1, rate_std1, edist1 = cell_data[(bird, block, segment, hemi, ci1)]

                    for n2 in range(n1):
                        ci2 = index2cell[n2]
                        rate2, rate_std2, edist2 = cell_data[(bird, block, segment, hemi, ci2)]

                        e1 = cell_index2electrode[ci1]
                        e2 = cell_index2electrode[ci2]

                        cells_same_electrode = int(e1 == e2)
                        same_electrode = int(e1 == e2 and e1 == e)
                        dist_cell2cell = edist1[e2]
                        avg_dist_from_electrode = (edist1[e] + edist2[e]) / 2.

                        wdata['bird'].append(bird)
                        wdata['block'].append(block)
                        wdata['segment'].append(segment)
                        wdata['hemi'].append(hemi)
                        wdata['electrode'].append(e)
                        wdata['region'].append(reg)
                        wdata['f'].append(int(f))
                        wdata['w'].append(W[n1+1, n2])
                        wdata['r2'].append(r2)
                        wdata['dist_l2a'].append(dist_l2a)
                        wdata['dist_midline'].append(dist_midline)
                        wdata['wtype'].append('sync')
                        wdata['rate_mean'].append(-1)
                        wdata['rate_std'].append(-1)
                        wdata['sync_mean'].append(-1)
                        wdata['sync_std'].append(-1)
                        wdata['dist_from_electrode'].append(avg_dist_from_electrode)
                        wdata['dist_cell2cell'].append(dist_cell2cell)
                        wdata['cell_index'].append(-1)
                        wdata['same_electrode'].append(same_electrode)
                        wdata['cells_same_electrode'].append(cells_same_electrode)

    wdf = pd.DataFrame(wdata)
    if write_to_file:
        wdf.to_csv('/auto/tdrive/mschachter/data/aggregate/lfp_encoder_weights.csv', index=False)

    return wdf
示例#6
0
def plot_maps(agg, data_dir='/auto/tdrive/mschachter/data'):

    edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data+dist.csv'))

    data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(),
            'electrode':list(), 'reg':list(), 'dm':list(), 'dl':list(),
            'aprop':list(), 'r2':list()}

    df = agg.df
    # encoder performance maps
    aprops_to_show = APROPS_TO_SHOW

    # build a dataset that makes it easy to plot single decoder performance
    g = df.groupby(['bird', 'block', 'segment', 'hemi', 'electrode', 'aprop'])
    for (bird,block,segment,hemi,electrode,aprop),gdf in g:

        assert len(gdf) == 1

        ei = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == electrode)
        assert ei.sum() == 1
        reg = clean_region(edata.region[ei].values[0])
        dist_l2a = edata.dist_l2a[ei].values[0]
        dist_midline = edata.dist_midline[ei].values[0]

        if bird == 'GreBlu9508M':
            dist_l2a *= 4

        data['bird'].append(bird)
        data['block'].append(block)
        data['segment'].append(segment)
        data['hemi'].append(hemi)
        data['dm'].append(dist_midline)
        data['dl'].append(dist_l2a)
        data['r2'].append(gdf.r2.values[0])
        data['reg'].append(reg)
        data['electrode'].append(electrode)
        data['aprop'].append(aprop)
       
    df = pd.DataFrame(data)
    i = ~np.isnan(df.dm) & ~np.isnan(df.dl) & ~np.isnan(df.r2) & (df.r2 > 0)
    df = df[i]
    print df.describe()

    def _plot_map(_pdata, _ax, _cmap, _maxval, _bgcolor=None, _perf_alpha=False, _plot_region=False, _msize=60, _region_only=False):
        if _bgcolor is not None:
            _ax.set_axis_bgcolor(_bgcolor)
        _pval = _pdata['df'].r2.values
        _x = _pdata['df'].dm.values
        _y = _pdata['df'].dl.values
        _regs = _pdata['df'].reg.values

        plt.sca(_ax)
        _alpha = np.ones([len(_pval)])
        if _perf_alpha:
            _alpha = _pdata['df'].r2.values
            _alpha /= _alpha.max()
            _alpha[_alpha > 0.9] = 1.
            _clrs = _cmap(_pval / _maxval)
        else:
            _clrs = _cmap(_pval / _maxval)

        if not _region_only:
            plt.scatter(_x, _y, c=_pval, marker='o', cmap=_cmap, vmin=0, s=_msize, alpha=0.6)

        _cbar = plt.colorbar(label='Decoder R2')
        _new_ytks = ['%0.2f' % float(_yt.get_text()) for _yt in _cbar.ax.get_yticklabels()]
        # print '_new_ytks=', _new_ytks
        _cbar.ax.set_yticklabels(_new_ytks)

        plt.xlabel('Dist to Midline (mm)')
        plt.ylabel('Dist to L2A (mm)')
        # print 'ytks=',_ytks
        plt.xlim(0, 2.5)
        plt.ylim(-1, 1)

        if _plot_region:
            for _k,(_xx,_yy) in enumerate(zip(_x, _y)):
                if _regs[_k] not in ['HP', '?'] and '-' not in _regs[k]:
                    plt.text(_xx, _yy, _regs[_k], fontsize=10, color='k', alpha=0.7)

    def rb_cmap(x):
        assert np.abs(x).max() <= 1
        _rgb = np.zeros([len(x), 3])
        _pos = x >= 0
        _neg = x < 0

        _rgb[_pos, 0] = x[_pos]
        _rgb[_neg, 2] = np.abs(x[_neg])

        return _rgb

    figsize = (23, 13)
    fig = plt.figure(figsize=figsize)
    fig.subplots_adjust(left=0.05, right=0.98, hspace=0.25, wspace=0.25)
    nrows = 2
    ncols = 3
    for k, aprop in enumerate(aprops_to_show):
        ax = plt.subplot(nrows, ncols, k+1)
        i = df.aprop == aprop
        max_r2 = df[i].r2.max()
        print 'k=%d, %s: max_r2=%0.2f' % (k, aprop, max_r2)
        # _plot_map({'df':df[i]}, ax, magma, max_r2, _bgcolor='k', _perf_alpha=False, _plot_region=False)
        _plot_map({'df': df[i]}, ax, plt.cm.afmhot_r, max_r2,_bgcolor='w',)
        plt.title(ACOUSTIC_PROP_NAMES[aprop])

    ax = plt.subplot(nrows, ncols, 5)
    # _plot_map({'df': df[df.aprop == 'maxAmp']}, ax, plt.cm.afmhot_r, 1., _bgcolor='w', _plot_region=True, _region_only=True)
    plot_r2_region_prop(ax)

    fname = os.path.join(get_this_dir(), 'single_electrode_decoder_r2.svg')
    plt.savefig(fname, facecolor='w', edgecolor='none')

    plt.show()
示例#7
0
def export_dfs(agg, data_dir='/auto/tdrive/mschachter/data'):

    freqs = agg.freqs
    # read electrode data
    edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data.csv'))

    decomp_list = ['lfp', 'spike', 'spike_rate']
    no_lkrat_list = ['spike_rate']
    pair_list = [('lfp', 'locked'), ('spike', 'spike_psd'), ('spike_rate', 'spike_rate')]

    for ncomp in range(1, 12):
        decomp_list.append('locked_pca_%d' % ncomp)
        decomp_list.append('spike_psd_pca_%d' % ncomp)
        no_lkrat_list.append('locked_pca_%d' % ncomp)
        no_lkrat_list.append('spike_psd_pca_%d' % ncomp)
        pair_list.append( ('locked_pca_%d' % ncomp, 'locked_pca_%d' % ncomp) )
        pair_list.append( ('spike_psd_pca_%d' % ncomp, 'spike_psd_pca_%d' % ncomp) )

    # initialize multi electrode dataset dictionary
    multi_electrode_data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(), 'band':list()}
    anames = agg.acoustic_props + ['category']
    for aprop in anames:
        for t in decomp_list:
            multi_electrode_data['perf_%s_%s' % (aprop, t)] = list()
            if t not in no_lkrat_list:
                multi_electrode_data['lkrat_%s_%s' % (aprop, t)] = list()

    # initialize multielectrode performance dictionary (a little bit different than the dataset dictionary
    me_perf_data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(),
                    'perf':list(), 'decomp':list(), 'aprop':list()}

    # initialize single electrode dataset dictionary
    single_electrode_data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(), 'electrode':list(),
                             'region':list()}

    anames = agg.acoustic_props + ['category']
    for aprop in anames:
        single_electrode_data['perf_%s' % aprop] = list()
        single_electrode_data['lkrat_%s' % aprop] = list()

    # initialize single cell dataset dictionary
    cell_data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(), 'electrode':list(),
                 'region':list(), 'cell_index':list()}
    for aprop in anames:
        cell_data['perf_%s' % aprop] = list()
        cell_data['lkrat_%s' % aprop] = list()

    nbands = len(freqs)
    i = agg.df.bird != 'BlaBro09xxF'
    g = agg.df[i].groupby(['bird', 'block', 'segment', 'hemi'])

    for (bird,block,segment,hemi),gdf in g:

        wtup = (bird,block,segment,hemi)
        index2electrode = agg.index2electrode[wtup]
        cell_index2electrode = agg.cell_index2electrode[wtup]

        # compute the number of cells, use it to compute significance thresholds for the likelihood ratios
        i = (gdf.e1 != -1) & (gdf.e1 == gdf.e2) & (gdf.cell_index != -1) & (gdf.decomp == 'spike_psd') & \
            (gdf.exel == False) & (gdf.exfreq == False) & (gdf.aprop == 'q2')
        ncells = i.sum()
        # print '%s,%s,%s,%s # of cells: %d' % (bird, block, segment, hemi, ncells)

        chi2_x = np.linspace(0, 100, 10000)

        # get the region by electrode
        electrode2region = dict()
        for e in index2electrode:
            i = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e)
            assert i.sum() == 1
            electrode2region[e] = clean_region(edata.region[i].values[0])

        # collect multi-electrode multi-band dataset
        band0_perfs = None
        for b in range(nbands+1):

            exfreq = b > 0

            perfs = dict()
            anames = agg.acoustic_props + ['category']
            for aprop in anames:
                for t,decomp in pair_list:
                    if decomp in no_lkrat_list and b > 0:
                        perfs['perf_%s_%s' % (aprop, t)] = 0
                        continue

                    # get multielectrode LFP decoder performance
                    i = (gdf.e1 == -1) & (gdf.e2 == -1) & (gdf.cell_index == -1) & (gdf.band == b) & (gdf.exfreq == exfreq) & \
                        (gdf.exel == False) & (gdf.aprop == aprop) & (gdf.decomp == decomp)

                    """
                    print '------------'
                    print 'decomp=%s' % decomp
                    print 'unique decomps:',gdf.decomp.unique()
                    iii = (gdf.e1 == -1) & (gdf.e2 == -1) & (gdf.cell_index == -1) & (gdf.band == b)
                    print 'iii.sum()=%d' % iii.sum()
                    iiii = (gdf.e1 == -1) & (gdf.e2 == -1) & (gdf.cell_index == -1) & (gdf.band == b) & (gdf.decomp == decomp)
                    print 'iiii.sum()=%d' % iiii.sum()
                    iiiii = (gdf.e1 == -1) & (gdf.e2 == -1) & (gdf.cell_index == -1) & (gdf.band == b) & (gdf.aprop == aprop)
                    print 'iiiii.sum()=%d' % iiiii.sum()
                    iiiiii = (gdf.e1 == -1) & (gdf.e2 == -1) & (gdf.cell_index == -1) & (gdf.band == b) & (gdf.aprop == aprop) & (gdf.decomp == decomp)
                    print 'iiiiii.sum()=%d' % iiiiii.sum()
                    """

                    if i.sum() != 1:
                        print 'len(gdf)=%d' % len(gdf)
                        print gdf
                        print "Zero or more than 1 result for (%s, %s, %s, %s), decomp=%s, band=%d, aprop=%s, exfreq=%d, exel=%d: i.sum()=%d" % (bird, block, segment, hemi, decomp, b, aprop, exfreq, False, i.sum())
                        return
                        continue

                    if aprop == 'category':
                        mperf = gdf.pcc[i].values[0]
                    else:
                        mperf = gdf.r2[i].values[0]
                    perfs['perf_%s_%s' % (aprop, t)] = mperf

                    lk = gdf.likelihood[i].values[0]
                    # if aprop == 'category':
                    #     nsamps = gdf.num_samps[i].values[0]
                    #     lk *= nsamps
                    perfs['lk_%s_%s' % (aprop, t)] = lk

                    eff_dof = gdf.effective_dof[i].values[0]
                    perfs['dof_%s_%s' % (aprop, t)] = eff_dof

                    if b == 0:
                        me_perf_data['bird'].append(bird)
                        me_perf_data['block'].append(block)
                        me_perf_data['segment'].append(segment)
                        me_perf_data['hemi'].append(hemi)
                        me_perf_data['perf'].append(mperf)
                        me_perf_data['aprop'].append(aprop)
                        me_perf_data['decomp'].append(decomp)

            multi_electrode_data['bird'].append(bird)
            multi_electrode_data['block'].append(block)
            multi_electrode_data['segment'].append(segment)
            multi_electrode_data['hemi'].append(hemi)
            multi_electrode_data['band'].append(b)
            for k,v in perfs.items():
                if k.startswith('perf'):
                    multi_electrode_data[k].append(v)

            if b == 0:
                band0_perfs = perfs
                for aprop in anames:
                    for t in ['lfp', 'spike']:
                        multi_electrode_data['lkrat_%s_%s' % (aprop,t)].append(0)
            else:
                # compute the likelihood ratio for each acoustic property on this band
                for aprop in anames:
                    for t in ['lfp', 'spike']:
                        # compute the likelihood ratio
                        full_likelihood = band0_perfs['lk_%s_%s' % (aprop, t)]
                        leave_one_out_likelihood = perfs['lk_%s_%s' % (aprop, t)]
                        lkrat = 2*(leave_one_out_likelihood - full_likelihood)

                        # grab the effective degrees of freedom and compute the chi2 significant threshold
                        full_dof = band0_perfs['dof_%s_%s' % (aprop, t)]
                        loo_dof = perfs['dof_%s_%s' % (aprop, t)]
                        if aprop != 'category':
                            if t == 'lfp':
                                dof = 16
                            elif t == 'spike':
                                dof = ncells
                            p = chi2.pdf(chi2_x, dof)
                            pi = p > 0.01
                            if pi.sum() == 0:
                                """
                                plt.figure()
                                plt.plot(chi2_x, p, 'k-')
                                plt.axis('tight')
                                plt.title('aprop=%s, full_dof=%0.2f, loo_dof=%0.2f' % (aprop, full_dof, loo_dof))
                                plt.show()
                                """
                                print '**** No thresh for aprop=%s, lkrat=%0.3f, full_dof=%0.2f, loo_dof=%0.2f' % (aprop, lkrat, full_dof, loo_dof)
                                sig_thresh = 1.
                            else:
                                sig_thresh = min(chi2_x[pi])
                            # print 'aprop=%s, t=%s, lkrat=%0.3f, full_dof=%0.2f, loo_dof=%0.2f, sig_thresh=%0.6f' % \
                            #       (aprop, t, lkrat, full_dof, loo_dof, sig_thresh)
                        else:
                            sig_thresh = 1.

                        lkrat /= sig_thresh
                        multi_electrode_data['lkrat_%s_%s' % (aprop, t)].append(lkrat)

        """
        # collect single electrode dataset
        for e in index2electrode:

            # get LFP performance data for this electrode, with and without leave-one-out (the variable "exel")
            perfs = dict()
            perfs_exel = dict()
            anames = agg.acoustic_props + ['category']
            for aprop in anames:
                for exel in [True, False]:
                    p = perfs
                    if exel:
                        p = perfs_exel
                    # get multielectrode LFP decoder performance
                    i = (gdf.e1 == e) & (gdf.e2 == e) & (gdf.cell_index == -1) & (gdf.band == 0) & (gdf.exfreq == False) & \
                        (gdf.exel == exel) & (gdf.aprop == aprop) & (gdf.decomp == 'locked')
                    assert i.sum() == 1, "Zero or more than 1 result for (%s, %s, %s, %s), decomp=locked, e=%d: i.sum()=%d" % (bird, block, segment, hemi, e, i.sum())
                    if aprop == 'category':
                        p['perf_%s' % aprop] = gdf.pcc[i].values[0]
                    else:
                        p['perf_%s' % aprop] = gdf.r2[i].values[0]

                    lk = gdf.likelihood[i].values[0]
                    if aprop == 'category':
                        nsamps = gdf.num_samps[i].values[0]
                        lk *= nsamps
                    p['lk_%s' % aprop] = lk

            # append the single electrode performances and likelihood ratios to the single electrode dataset
            single_electrode_data['bird'].append(bird)
            single_electrode_data['block'].append(block)
            single_electrode_data['segment'].append(segment)
            single_electrode_data['hemi'].append(hemi)
            single_electrode_data['electrode'].append(e)
            single_electrode_data['region].append(electrode2region[e])

            for aprop in anames:
                # append single electrode peformance
                single_electrode_data['perf_%s' % aprop].append(perfs['perf_%s' % aprop])
                # append likelihood ratio
                full_likelihood = band0_perfs['lk_%s_%s' % (aprop, 'lfp')]
                leave_one_out_likelihood = perfs_exel['lk_%s' % aprop]
                lkrat = 2*(leave_one_out_likelihood - full_likelihood)
                lkrat /= sig_thresh_electrode_or_cell_acoustic
                single_electrode_data['lkrat_%s' % aprop].append(lkrat)

        # collect single cell dataset
        for e in index2electrode:

            # count the number of cells and get their indices
            i = (gdf.e1 == e) & (gdf.e2 == e) & (gdf.cell_index != -1) & (gdf.band == 0) & (gdf.exfreq == False) & \
                    (gdf.exel == False) & (gdf.decomp == 'spike_psd')
            if i.sum() == 0:
                print 'No cells for (%s, %s, %s, %s), e=%d' % (bird, block, segment, hemi, e)
                continue

            cell_indices = sorted(gdf[i].cell_index.unique())
            for ci in cell_indices:

                missing_data = False
                # get cell performance data for this electrode, with and without leave-one-out (the variable "exel")
                perfs = dict()
                perfs_exel = dict()
                anames = agg.acoustic_props + ['category']
                for aprop in anames:
                    for exel in [True, False]:
                        p = perfs
                        if exel:
                            p = perfs_exel

                        # get multielectrode LFP decoder performance
                        i = (gdf.e1 == e) & (gdf.e2 == e) & (gdf.cell_index == ci) & (gdf.band == 0) & (gdf.exfreq == False) & \
                            (gdf.exel == exel) & (gdf.aprop == aprop) & (gdf.decomp == 'spike_psd')
                        if i.sum() == 0:
                            print "No result for (%s, %s, %s, %s), decomp=spike_psd, e=%d, ci=%d: i.sum()=%d" % (bird, block, segment, hemi, e, ci, i.sum())
                            missing_data = True
                            continue
                        if i.sum() > 1:
                            print "More than 1 result for (%s, %s, %s, %s), decomp=spike_psd, e=%d, ci=%d: i.sum()=%d" % (bird, block, segment, hemi, e, ci, i.sum())
                            missing_data = True
                            continue

                        if aprop == 'category':
                            p['perf_%s' % aprop] = gdf.pcc[i].values[0]
                        else:
                            p['perf_%s' % aprop] = gdf.r2[i].values[0]

                        lk = gdf.likelihood[i].values[0]
                        if aprop == 'category':
                            nsamps = gdf.num_samps[i].values[0]
                            lk *= nsamps
                        p['lk_%s' % aprop] = lk

                if missing_data:
                    print 'Skipping cell %d on electrode %d for (%s, %s, %s, %s)' % (ci, e, bird, block, segment, hemi)
                    continue

                # append the single electrode performances and likelihood ratios to the single electrode dataset
                cell_data['bird'].append(bird)
                cell_data['block'].append(block)
                cell_data['segment'].append(segment)
                cell_data['hemi'].append(hemi)
                cell_data['electrode'].append(e)
                cell_data['region'].append(electrode2region[e])
                cell_data['cell_index'].append(ci)

                for aprop in anames:
                    # append single electrode peformance
                    cell_data['perf_%s' % aprop].append(perfs['perf_%s' % aprop])
                    # append likelihood ratio
                    full_likelihood = band0_perfs['lk_%s_%s' % (aprop, 'spike')]
                    leave_one_out_likelihood = perfs_exel['lk_%s' % aprop]
                    lkrat = 2*(leave_one_out_likelihood - full_likelihood)
                    lkrat /= sig_thresh_electrode_or_cell_acoustic
                    cell_data['lkrat_%s' % aprop].append(lkrat)
        """

    df_me = pd.DataFrame(multi_electrode_data)
    df_me.to_csv(os.path.join(data_dir, 'aggregate', 'multi_electrode_perfs.csv'), index=False)

    df_me_perf = pd.DataFrame(me_perf_data)
    df_me_perf.to_csv(os.path.join(data_dir, 'aggregate', 'multi_electrode_perfs_for_glm.csv'), index=False)

    """
示例#8
0
def get_encoder_weights_squared(agg, decomp, data_dir='/auto/tdrive/mschachter/data'):

    freqs, lags = get_freqs_and_lags()

    i = agg.df.decomp == decomp
    assert i.sum() > 0
    g = agg.df[i].groupby(['bird', 'block', 'segment', 'hemi'])

    edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data.csv'))
    wdata = {'region': list(), 'freq': list(), 'xindex': list(), 'eperf':list()}
    Wsq = list()

    for (bird, block, seg, hemi), gdf in g:

        assert len(gdf) == 1

        wkey = gdf['wkey'].values[0]
        iindex = gdf['iindex'].values[0]

        eperf = agg.encoder_perfs[wkey]
        eweights = agg.encoder_weights[wkey]
        index2electrode = agg.index2electrode[wkey]

        for k, e in enumerate(index2electrode):

            regi = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e)
            assert regi.sum() == 1
            reg = clean_region(edata[regi].region.values[0])

            for j, f in enumerate(freqs):
                w = eweights[k, j, :]

                wdata['eperf'].append(eperf[k, j])
                wdata['region'].append(reg)
                wdata['freq'].append(int(f))
                wdata['xindex'].append(len(Wsq))

                w2 = w**2
                w2 /= w2.sum()
                Wsq.append(w2)

    wdf = pd.DataFrame(wdata)
    Wsq = np.array(Wsq)

    # compute the average encoder weights by frequency
    r2_thresh = 0.05
    Wsq_by_freq = np.zeros([len(USED_ACOUSTIC_PROPS), len(freqs)])
    for j, f in enumerate(freqs):
        i = (wdf.freq == int(f)) & (wdf.eperf > r2_thresh)
        ii = wdf.xindex[i].values
        Wsq_by_freq[:, j] = Wsq[ii, :].mean(axis=0)

    # compute the average encoder weights by region
    regs = ['L2', 'CMM', 'CML', 'L1', 'L3', 'NCM']
    Wsq_by_reg = np.zeros([len(USED_ACOUSTIC_PROPS), len(regs)])
    for j, reg in enumerate(regs):
        i = wdf.region == reg
        ii = wdf.xindex[i].values
        Wsq_by_reg[:, j] = Wsq[ii, :].mean(axis=0)

    return wdf,Wsq,Wsq_by_freq,Wsq_by_reg
示例#9
0
def export_pairwise_encoder_datasets_for_glm(agg, data_dir='/auto/tdrive/mschachter/data'):

    freqs,lags = get_freqs_and_lags()

    edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data+dist.csv'))

    data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(),
            'electrode1':list(), 'electrode2':list(), 'regions':list(),
            'site':list(), 'lag':list(), 'r2':list(), 'dist':list()}

    weight_data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(),
                   'electrode1':list(), 'electrode2':list(), 'regions':list(),
                   'site':list(), 'lag':list(), 'aprop':list(), 'w':list(), 'dist':list()}

    decomp = 'self+cross_locked'
    i = agg.df.decomp == decomp

    g = agg.df[i].groupby(['bird', 'block', 'segment', 'hemi'])
    for (bird,block,seg,hemi),gdf in g:

        assert len(gdf) == 1

        wkey = gdf['wkey'].values[0]
        index2electrode = agg.index2electrode[wkey]

        eperf = agg.encoder_perfs[wkey]
        eweights = agg.encoder_weights[wkey]
        # normalize weights!
        eweights /= np.abs(eweights).max()

        site = '%s_%s_%s_%s' % (bird, block, seg, hemi)

        for k,e1 in enumerate(index2electrode):

            regi = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e1)
            assert regi.sum() == 1
            reg1 = clean_region(edata[regi].region.values[0])

            eloc1 = np.array([edata[regi].dist_midline.values[0], edata[regi].dist_l2a.values[0]])

            for j in range(k):
                e2 = index2electrode[j]

                regi = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e2)
                assert regi.sum() == 1
                reg2 = clean_region(edata[regi].region.values[0])

                eloc2 = np.array([edata[regi].dist_midline.values[0], edata[regi].dist_l2a.values[0]])

                # compute the distance between electrodes in anatomical coordinates
                edist = np.linalg.norm(eloc1 - eloc2)

                for li,lag in enumerate(lags):

                    r2 = eperf[k, j, li]

                    if lag < 0:
                        regs = '%s->%s' % (reg2, reg1)
                    else:
                        regs = '%s->%s' % (reg1, reg2)

                    data['bird'].append(bird)
                    data['block'].append(block)
                    data['segment'].append(seg)
                    data['hemi'].append(hemi)
                    data['electrode1'].append(e1)
                    data['electrode2'].append(e2)
                    data['regions'].append(regs)
                    data['site'].append(site)
                    data['lag'].append(int(lag))
                    data['r2'].append(r2)
                    data['dist'].append(edist)

                    for ai,aprop in enumerate(USED_ACOUSTIC_PROPS):
                        w = eweights[k, j, li, ai]

                        weight_data['bird'].append(bird)
                        weight_data['block'].append(block)
                        weight_data['segment'].append(seg)
                        weight_data['hemi'].append(hemi)
                        weight_data['electrode1'].append(e1)
                        weight_data['electrode2'].append(e2)
                        weight_data['regions'].append(regs)
                        weight_data['site'].append(site)
                        weight_data['lag'].append(int(lag))
                        weight_data['aprop'].append(aprop)
                        weight_data['w'].append(w)
                        weight_data['dist'].append(edist)

    df = pd.DataFrame(data)
    df.to_csv(os.path.join(data_dir, 'aggregate', 'pairwise_encoder_perfs_for_glm.csv'), header=True, index=False)

    wdf = pd.DataFrame(weight_data)
    wdf.to_csv(os.path.join(data_dir, 'aggregate', 'pairwise_encoder_weights_for_glm.csv'), header=True, index=False)
示例#10
0
def export_psd_encoder_datasets_for_glm(agg, data_dir='/auto/tdrive/mschachter/data'):

    freqs,lags = get_freqs_and_lags()

    edata = pd.read_csv(os.path.join(data_dir, 'aggregate', 'electrode_data.csv'))

    data = {'bird':list(), 'block':list(), 'segment':list(), 'hemi':list(), 'electrode':list(), 'cell_index':list(),
            'region':list(), 'site':list(), 'freq':list(), 'r2':list()}

    decomps = ['full_psds', 'spike_rate']

    assert isinstance(agg, AcousticEncoderDecoderAggregator)

    for decomp in decomps:

        i = agg.df.decomp == decomp
        g = agg.df[i].groupby(['bird', 'block', 'segment', 'hemi'])
        for (bird,block,seg,hemi),gdf in g:

            assert len(gdf) == 1

            wkey = gdf['wkey'].values[0]
            eperf = agg.encoder_perfs[wkey]
            index2electrode = agg.index2electrode[wkey]
            cell_index2electrode = agg.cell_index2electrode[wkey]

            site = '%s_%s_%s_%s' % (bird, block, seg, hemi)

            if decomp.endswith('psds'):
                for k,e in enumerate(index2electrode):
                    for j,f in enumerate(freqs):

                        regi = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e)
                        assert regi.sum() == 1
                        reg = clean_region(edata[regi].region.values[0])

                        data['bird'].append(bird)
                        data['block'].append(block)
                        data['segment'].append(seg)
                        data['hemi'].append(hemi)
                        data['electrode'].append(e)
                        data['region'].append(reg)
                        data['cell_index'].append(-1)
                        data['site'].append(site)
                        data['freq'].append(int(f))
                        data['r2'].append(eperf[k, j])

            elif decomp == 'spike_rate':
                for ci,e in enumerate(cell_index2electrode):
                    regi = (edata.bird == bird) & (edata.block == block) & (edata.hemisphere == hemi) & (edata.electrode == e)
                    assert regi.sum() == 1
                    reg = clean_region(edata[regi].region.values[0])

                    data['bird'].append(bird)
                    data['block'].append(block)
                    data['segment'].append(seg)
                    data['hemi'].append(hemi)
                    data['electrode'].append(e)
                    data['region'].append(reg)
                    data['cell_index'].append(ci)
                    data['site'].append(site)
                    data['freq'].append(-1)
                    data['r2'].append(eperf[ci])

    df = pd.DataFrame(data)
    df.to_csv(os.path.join(data_dir, 'aggregate', 'encoder_perfs_for_glm.csv'), header=True, index=False)