示例#1
0
 def _make_figure_title(self,
                        index,
                        h,
                        condn,
                        phase,
                        events,
                        test,
                        randomized=False,
                        paren=''):
     title = 'ROC Curves %s: %s Events for %s %s Criterion' % (
         paren and paren + ' ' or '', snake2title(events),
         snake2title(index), dict(ge=">=", gt=">", le="<=", lt="<")[test])
     if randomized:
         title = '[Randomized] ' + title
     if np.iterable(h) and len(h) > 10:
         h_str = '[%s, ..., %s]' % (', '.join(map(str, h[:3])), ', '.join(
             map(str, h[-3:])))
     else:
         h_str = str(h)
     title += '\nHalf-Window = %s' % h_str
     if condn:
         title += '\nScan-Cell Filter: "%s"' % condn
     if phase != 'scan':
         title += '\nScan Phase: %s' % phase.title()
     return title
示例#2
0
    def compute_ranksum_statistics(self, randomize=False, condn=None):

        fn = randomize and 'ranksum-randomized' or 'ranksum'
        self.start_logfile(fn)

        data_file = self.get_data_file()
        scan_cell_table = data_file.root.scan_cell_info
        outcome_table = data_file.root.hit_outcome

        h_to_colname = self._find_half_windows(outcome_table)
        h_windows = sorted(h_to_colname.keys())

        for index_name in ScanIndex.ActiveNames:
            scan_index = self._column_data(scan_cell_table, index_name,
                                           scan_cell_table, condn)

            if randomize:
                scan_index = np.random.permutation(scan_index)

            for h in h_windows:
                outcomes = self._column_data(outcome_table, h_to_colname[h],
                                             scan_cell_table, condn)

                negatives = np.logical_not(outcomes)
                positives = outcomes

                U, p = st.mannwhitneyu(scan_index[negatives],
                                       scan_index[positives])

                self.out('%s index, half-window %d: U = %d, p < %f' %
                         (snake2title(index_name), h, U, p))

        self.out('Note p-values are one-sided, multiply by 2 for two-sided.')
        self.close_logfile()
        self.close_data_file()
示例#3
0
 def _dump_page(self):
     savefile = os.path.join(
         self.savedir, '%s_%03d.pdf' % (self.save_file_stem, self.pageno))
     figtitle = snake2title(self.save_file_stem)
     if self.desc:
         figtitle += ' - %s' % self.desc.title()
     self.f.suptitle(figtitle)
     self.f.text(0.5,
                 0.04,
                 'Page %d' % self.pageno,
                 size='small',
                 ha='center')
     plt.savefig(savefile)
     self.out('Saved file:\n\t%s' % savefile)
     self.save_file_list.append(savefile)
     self.pageno += 1
     plt.clf()
示例#4
0
 def wrapper(*args, **kwargs):
     self.out(snake2title(f.__name__))
     return f(*args, **kwargs)
示例#5
0
    def process_data(self, bins=16):

        self.out.outfd = file(
            os.path.join(self.datadir, 'stats_%d.log' % bins), 'w')
        self.out.timestamp = False

        data_file = self.get_data_file()
        results = data_file.root.theta_velocity
        rat_list = unique_rats(results)
        velocity_moments = self.results['velocity_moments']

        def find_velocity_bins():
            vbins = {}
            for moment in velocity_moments:
                data = results.col(moment)
                lo, hi = CI(data, alpha=0.02)
                self.out('%s: range %f to %f' % (moment, lo, hi))
                vbins[moment] = np.linspace(lo, hi, bins + 1)
            return vbins

        velocity_bins = find_velocity_bins()

        def generate_velocity_modulation_curves():
            self.out('Generating velocity modulation curves...')
            P_v = np.empty((len(velocity_moments), len(rat_list), bins), 'd')
            f_v = np.empty_like(P_v)

            power = results.col('power')
            frequency = results.col('frequency')

            for i, moment in enumerate(velocity_moments):
                self.out('Computing %s...' % moment)

                for j, rat in enumerate(rat_list):
                    self.out.printf('[%d] ' % rat, color='lightgreen')

                    for k in xrange(bins):
                        v_lo, v_hi = velocity_bins[moment][k:k + 2]

                        query = '(rat==%d)&(%s>=%f)&(%s<%f)' % (
                            rat, moment, v_lo, moment, v_hi)

                        ix = results.getWhereList(query)
                        power_sample = power[ix]
                        frequency_sample = frequency[ix]

                        P_v[i, j, k] = np.median(power_sample)
                        f_v[i, j, k] = np.median(frequency_sample)

                self.out.printf('\n')
            return P_v, f_v

        P_v_fn = os.path.join(self.datadir, 'P_v_%d.npy' % bins)
        f_v_fn = os.path.join(self.datadir, 'f_v_%d.npy' % bins)
        if os.path.exists(P_v_fn) and os.path.exists(f_v_fn):
            self.out('Loading previous velocity modulation data...')
            P_v, f_v = map(np.load, (P_v_fn, f_v_fn))
        else:
            P_v, f_v = generate_velocity_modulation_curves()
            np.save(P_v_fn, P_v)
            np.save(f_v_fn, f_v)

        plt.ioff()
        if type(self.figure) is not dict:
            self.figure = {}
        self.figure['velocity_modulation_curves'] = f = plt.figure(
            num=20, figsize=(9, 10))
        plt.clf()
        f.suptitle('Theta (Z-)Power, Frequency - Velocity Modulation')

        N_moments = len(velocity_moments)
        line_fmt = dict(ls='-', c='k', lw=2)
        shade_fmt = dict(ec='none', alpha=0.3, fill=True, fc='k', zorder=-1)

        def compute_error(M):
            return 1.96 * M.std(axis=0) / np.sqrt(M.shape[0])

        def get_errlim(mu, err, factor=0.1):
            errmin, errmax = (mu - sem).min(), (mu + sem).max()
            derr = errmax - errmin
            return errmin - (factor / 2) * derr, errmax + (factor / 2) * derr

        labels = ('Z-Power', 'Frequency')

        for i, moment in enumerate(velocity_moments[:2]):
            centers = (lambda b: (b[1:] + b[:-1]) / 2)(velocity_bins[moment])

            for j, X_v in enumerate([P_v, f_v]):

                ax = f.add_subplot(N_moments, 2, i * 2 + j + 1)
                mu = X_v[i].mean(axis=0)
                sem = compute_error(X_v[i])

                ax.plot(centers, mu, **line_fmt)
                shaded_error(centers, mu, sem, ax=ax, **shade_fmt)

                self.out('%s: %s %s' %
                         (moment.title(), labels[j], friedman_str(X_v[i])))

                if i == 0:
                    quicktitle(ax, labels[j], size='x-small')

                ax.set_xlabel(snake2title(moment))
                ax.set_xlim(centers[0], centers[-1])
                ax.set_ylim(get_errlim(mu, sem))

        plt.ion()
        plt.show()
        self.close_data_file()
        self.out.outfd.close()
示例#6
0
    def process_data(self):
        """Create figure plotting distributions of scan-firing measures and
        compute various relevant statistics
        """
        from scanr.tools.stats import smooth_pdf, t_one_tailed
        from scanr.tools.string import snake2title
        from scipy.stats import (ttest_ind as ttest, t as t_dist, ks_2samp as kstest)

        os.chdir(self.datadir)
        self.out.outfd = file('figure.log', 'w')
        self.out.timestamp = False

        self.figure = {}
        self.figure['distros'] = f = plt.figure(figsize=(10, 16))
        f.suptitle('Distributions of LEC/MEC Scan/Non-scan Firing')

        LEC = self.results['LEC']
        MEC = self.results['MEC']

        data_types = LEC.dtype.names
        N = len(data_types)

        def mean_pm_sem(a):
            return '%.4f +/- %.4f'%(a.mean(), a.std()/np.sqrt(a.size))

        plt.ioff()
        kw = dict(lw=2, aa=True)
        for i, data in enumerate(data_types):
            ax = plt.subplot(N, 1, i+1)
            label = snake2title(data)

            kw.update(label='LEC', c='b')
            ax.plot(*smooth_pdf(LEC[data]), **kw)

            kw.update(label='MEC', c='g')
            ax.plot(*smooth_pdf(MEC[data]), **kw)

            ax.axis('tight')
            v = list(ax.axis())
            v[3] *= 1.1
            ax.axis(v)

            med_LEC = np.median(LEC[data])
            med_MEC = np.median(MEC[data])
            ax.plot([med_LEC]*2, [v[2], v[3]], 'b--')
            ax.plot([med_MEC]*2, [v[2], v[3]], 'g--')

            self.out(label.center(50, '-'))
            N_LEC = LEC[data].size
            N_MEC = MEC[data].size
            self.out('Median LEC(%s) = %.4f'%(label, med_LEC))
            self.out('Mean/SEM LEC(%s) = %s'%(label, mean_pm_sem(LEC[data])))
            if data.endswith('norm'):
                self.out('T(LEC > 1) = %.4f, p = %.8f'%t_one_tailed(LEC[data]))
                self.out('T_cstv(LEC > 1) = %.4f, p = %.8f'%t_one_tailed(
                    LEC[data], df=N_LEC/float(5)-1))
            self.out('N LEC cell-sessions = %d'%N_LEC)
            self.out('Median MEC(%s) = %.4f'%(label, med_MEC))
            self.out('Mean/SEM MEC(%s) = %s'%(label, mean_pm_sem(MEC[data])))
            if data.endswith('norm'):
                self.out('T(MEC > 1) = %.4f, p = %.8f'%t_one_tailed(MEC[data]))
                self.out('T_cstv(MEC > 1) = %.4f, p = %.8f'%t_one_tailed(
                    MEC[data], df=N_MEC/float(5)-1))
            self.out('N MEC cell-sessions = %d'%N_MEC)

            t = ttest(LEC[data], MEC[data])
            k = kstest(LEC[data], MEC[data])
            self.out('T-test(%s): t = %.4f, p = %.8f'%(label, t[0], t[1]))
            self.out('KS-test(%s): D = %.4f, p = %.8f'%(label, k[0], k[1]))
            if t[1] < 0.05:
                ax.text(0.025, 0.8, '*t', size='x-large', transform=ax.transAxes)
            if k[1] < 0.05:
                ax.text(0.025, 0.6, '*KS', size='x-large', transform=ax.transAxes)

            ax.set_ylabel('p[ %s ]'%label)

            if i == 0:
                ax.legend(loc=1)

        plt.ion()
        plt.show()
        self.out.outfd.close()
示例#7
0
    def bootstrap_overall_fractions(self,
                                    first_day=False,
                                    shuffles=1000,
                                    ymax=0.6):
        """Note: this is old processing code, before the proper within-rat analysis
        was implemented
        """
        self.out.outfd = file(os.path.join(self.datadir, 'figure.log'), 'w')
        self.out.timestamp = False

        if first_day:
            daystr = '(day==1)&'
            suffix = '_day1'
            daylabel = 'First Day '
        else:
            daystr = '(day!=0)&'
            suffix = ''
            daylabel = ''

        data_file = self.get_data_file()
        field_data = data_file.root.place_fields
        N = field_data.nrows
        self.out("Found %d rows of place field data." % N)

        if os.path.exists(
                os.path.join(self.datadir, 'F_DR_type%s.npy' % suffix)):
            self.out('Loading: DR, across maze type')
            F_DR_type = np.load(
                os.path.join(self.datadir, 'F_DR_type%s.npy' % suffix))
        else:
            self.out('Computing: DR, across maze type')
            F_DR_type = np.zeros((shuffles + 1, 2), 'd')
            for j, mtype in enumerate(('STD', 'MIS')):
                self.out('Type = %s' % mtype)
                I = field_data.getWhereList(daystr +
                                            '(session!=1)&(type=="%s")' %
                                            mtype)
                N = len(I)
                if not N:
                    continue
                B = np.vstack((np.arange(N),
                               np.random.random_integers(0,
                                                         N - 1,
                                                         size=(shuffles, N))))
                for i in xrange(shuffles + 1):
                    self.out.printf('.')
                    F_DR_type[i, j] = sum(
                        [field_data[I[ix]]['events'] > 0 for ix in B[i]]) / N
                self.out.printf('\n')
            np.save(os.path.join(self.datadir, 'F_DR_type%s' % suffix),
                    F_DR_type)

        if os.path.exists(os.path.join(self.datadir,
                                       'F_DR_num%s.npy' % suffix)):
            self.out('Loading: DR, across maze number')
            F_DR_num = np.load(
                os.path.join(self.datadir, 'F_DR_num%s.npy' % suffix))
        else:
            self.out('Computing: DR, across maze number')
            F_DR_num = np.zeros((shuffles + 1, 5), 'd')
            for j, number in enumerate(range(1, 6)):
                self.out('Session = %d' % number)
                I = field_data.getWhereList(daystr +
                                            '(expt_type=="DR")&(session==%d)' %
                                            number)
                N = len(I)
                if not N:
                    continue
                B = np.vstack((np.arange(N),
                               np.random.random_integers(0,
                                                         N - 1,
                                                         size=(shuffles, N))))
                for i in xrange(shuffles + 1):
                    self.out.printf('.')
                    F_DR_num[i, j] = sum(
                        [field_data[I[ix]]['events'] > 0 for ix in B[i]]) / N
                self.out.printf('\n')
            np.save(os.path.join(self.datadir, 'F_DR_num%s' % suffix),
                    F_DR_num)

        if os.path.exists(
                os.path.join(self.datadir, 'F_nov_type%s.npy' % suffix)):
            self.out('Loading: Nov, across maze type')
            F_nov_type = np.load(
                os.path.join(self.datadir, 'F_nov_type%s.npy' % suffix))
        else:
            self.out('Computing: Nov, across maze type')
            F_nov_type = np.zeros((shuffles + 1, 2), 'd')
            for j, mtype in enumerate(('FAM', 'NOV')):
                self.out('Type = %s' % mtype)
                I = field_data.getWhereList(daystr +
                                            '(session!=1)&(type=="%s")' %
                                            mtype)
                N = len(I)
                if not N:
                    continue
                B = np.vstack((np.arange(N),
                               np.random.random_integers(0,
                                                         N - 1,
                                                         size=(shuffles, N))))
                for i in xrange(shuffles + 1):
                    self.out.printf('.')
                    F_nov_type[i, j] = sum(
                        [field_data[I[ix]]['events'] > 0 for ix in B[i]]) / N
                self.out.printf('\n')
            np.save(os.path.join(self.datadir, 'F_nov_type%s' % suffix),
                    F_nov_type)

        if os.path.exists(
                os.path.join(self.datadir, 'F_nov_num%s.npy' % suffix)):
            self.out('Loading: Nov, across maze number')
            F_nov_num = np.load(
                os.path.join(self.datadir, 'F_nov_num%s.npy' % suffix))
        else:
            self.out('Computing: Nov, across maze number')
            F_nov_num = np.zeros((shuffles + 1, 3), 'd')
            for j, number in enumerate(range(1, 4)):
                self.out('Session = %d' % number)
                I = field_data.getWhereList(
                    daystr + '(expt_type=="NOV")&(session==%d)' % number)
                N = len(I)
                if not N:
                    continue
                B = np.vstack((np.arange(N),
                               np.random.random_integers(0,
                                                         N - 1,
                                                         size=(shuffles, N))))
                for i in xrange(shuffles + 1):
                    self.out.printf('.')
                    F_nov_num[i, j] = sum(
                        [field_data[I[ix]]['events'] > 0 for ix in B[i]]) / N
                self.out.printf('\n')
            np.save(os.path.join(self.datadir, 'F_nov_num%s' % suffix),
                    F_nov_num)

        self.close_data_file()

        # Bar charts, maze number and maze type
        if type(self.figure) is not dict:
            self.figure = {}
        self.figure['field_stats%s' % suffix] = f = plt.figure(figsize=(10, 7))
        f.suptitle('Prevalence Fraction of %sEvents: %s' %
                   (daylabel, snake2title(self.results['table_name'])))

        fmt = dict(width=0.3,
                   linewidth=0,
                   color='0.4',
                   edgecolor='none',
                   ecolor='k',
                   capsize=0)

        def error(F, alpha=0.05):
            e = np.empty((2, F.shape[1]), 'd')
            for i, vals in enumerate(F[1:].T):
                e[:, i] = CI(vals,
                             alpha=alpha)  # empirical confidence interval
            e -= F[0]  # errorbar yerr weirdness, mean - row1 to mean + row2
            e[0] *= -1  # must invert to get appropriate behavior!!!
            return e

        ax = f.add_subplot(221)
        ax.bar([0, 0.4, 1, 1.4],
               np.r_[F_DR_type[0], F_nov_type[0]],
               yerr=error(np.c_[F_DR_type, F_nov_type]),
               **fmt)
        ax.set_xlim(-0.3, 3.7)
        ax.set_ylim(0, ymax)
        ax.set(xticks=[], xticklabels=[])
        ax.tick_params(right=False, direction='out')
        ax.axhline(1.0, c='k', ls='--', lw=1)

        ax = f.add_subplot(222)
        ax.boxplot([
            F_DR_type[:, 0], F_DR_type[:, 1], F_nov_type[:, 0], F_nov_type[:,
                                                                           1]
        ])
        ax.set_ylim(0, ymax)

        ax = f.add_subplot(223)
        ax.bar([0, 0.4, 0.8, 1.2, 1.6, 2.4, 2.8, 3.2],
               np.r_[F_DR_num[0], F_nov_num[0]],
               yerr=error(np.c_[F_DR_num, F_nov_num]),
               **fmt)
        ax.set_xlim(-0.3, 5.1)
        ax.set_ylim(0, ymax)
        ax.set(xticks=[], xticklabels=[])
        ax.tick_params(right=False, direction='out')
        ax.axhline(1.0, c='k', ls='--', lw=1)

        ax = f.add_subplot(224)
        ax.boxplot([
            F_DR_num[:, 0], F_DR_num[:, 1], F_DR_num[:, 2], F_DR_num[:, 3],
            F_DR_num[:, 4], F_nov_num[:, 0], F_nov_num[:, 1], F_nov_num[:, 2]
        ])
        ax.set_ylim(0, ymax)

        self.out.outfd.close()
        self.close_data_file()
示例#8
0
    def plot_roc_curves(self, ylim=(0.5, 1.0), skinny_sweep_ax=False):
        self.start_logfile('roc_auc')

        data_file = self.get_data_file(mode='a')
        roc_group = data_file.root.roc_curves
        outcome_table = data_file.root.hit_outcome

        scan_phase = data_file.root.scan_cell_info._v_attrs['scan_phase']
        roc_attrs = roc_group._v_attrs
        scan_cell_condn = roc_attrs['condn']
        is_randomized = roc_attrs['randomize']
        test = roc_attrs['test_operator']
        event_table = roc_attrs['event_table']

        h_to_colname = self._find_half_windows(outcome_table)
        h_windows = sorted(h_to_colname.keys())

        self.close_figures()
        plt.ioff()
        if type(self.figure) is not dict:
            self.figure = {}

        fmt = dict(lw=2, ls='-')  #, drawstyle='steps-mid')
        ndfmt = dict(c='0.4', ls='--', lw=1.5, zorder=-1)
        colors = plt.cm.coolwarm(np.linspace(0, 1, len(h_windows)))

        for index_name in ScanIndex.ActiveNames:

            f = self.new_figure('%s_ROC_curves' % index_name,
                                self._make_figure_title(
                                    index_name,
                                    h_windows,
                                    scan_cell_condn,
                                    scan_phase,
                                    event_table,
                                    test,
                                    randomized=is_randomized),
                                figsize=(11, 8))

            ax_roc = f.add_subplot(121)
            if skinny_sweep_ax:
                ax_auc = f.add_subplot(2, 5, 10)
            else:
                ax_auc = f.add_subplot(224)

            AUC = np.empty(len(h_windows), 'd')
            ndline = ax_roc.plot([0, 1], [0, 1], **ndfmt)

            for i, h in enumerate(h_windows):
                FPR, TPR = data_file.getNode(
                    roc_group, self._roc_array_name(index_name, h)).read()
                AUC[i] = self._compute_AUC(TPR, FPR, test)
                ax_roc.plot(FPR,
                            TPR,
                            label=str(h),
                            c=colors[i],
                            zorder=len(h_windows) - i,
                            **fmt)
                self.out('%s index, window %d: AUC = %f' %
                         (snake2title(index_name), h, AUC[i]))

            arr = create_array(
                data_file,
                '/roc_curves',
                self._auc_array_name(index_name),
                AUC,
                title='ROC-AUC Across Half-Windows for %s Index' % index_name)
            self.out('Saved %s.' % arr._v_pathname)

            if len(h_windows) < 11:
                ax_roc.legend(loc='lower right')

            ax_roc.set(xlim=(0, 1), ylim=(0, 1))
            ax_roc.set_ylabel('TPR')
            ax_roc.set_xlabel('FPR')
            ax_roc.axis('scaled')

            chance_line = ax_auc.axhline(0.5, **ndfmt)
            ax_auc.plot(h_windows,
                        AUC,
                        '-',
                        c='steelblue',
                        lw=3,
                        solid_capstyle='round')
            ax_auc.set(xlabel='Half-Window, degrees',
                       ylabel="AUC",
                       xlim=(0, h_windows[-1] + h_windows[0]),
                       ylim=ylim,
                       xticks=h_windows,
                       xticklabels=([h_windows[0]] + [''] *
                                    (len(h_windows) - 2) + [h_windows[-1]]))

        plt.ion()
        plt.show()

        self.close_logfile()
        self.close_data_file()