def _make_figure_title(self, index, h, condn, phase, events, test, randomized=False, paren=''): title = 'ROC Curves %s: %s Events for %s %s Criterion' % ( paren and paren + ' ' or '', snake2title(events), snake2title(index), dict(ge=">=", gt=">", le="<=", lt="<")[test]) if randomized: title = '[Randomized] ' + title if np.iterable(h) and len(h) > 10: h_str = '[%s, ..., %s]' % (', '.join(map(str, h[:3])), ', '.join( map(str, h[-3:]))) else: h_str = str(h) title += '\nHalf-Window = %s' % h_str if condn: title += '\nScan-Cell Filter: "%s"' % condn if phase != 'scan': title += '\nScan Phase: %s' % phase.title() return title
def compute_ranksum_statistics(self, randomize=False, condn=None): fn = randomize and 'ranksum-randomized' or 'ranksum' self.start_logfile(fn) data_file = self.get_data_file() scan_cell_table = data_file.root.scan_cell_info outcome_table = data_file.root.hit_outcome h_to_colname = self._find_half_windows(outcome_table) h_windows = sorted(h_to_colname.keys()) for index_name in ScanIndex.ActiveNames: scan_index = self._column_data(scan_cell_table, index_name, scan_cell_table, condn) if randomize: scan_index = np.random.permutation(scan_index) for h in h_windows: outcomes = self._column_data(outcome_table, h_to_colname[h], scan_cell_table, condn) negatives = np.logical_not(outcomes) positives = outcomes U, p = st.mannwhitneyu(scan_index[negatives], scan_index[positives]) self.out('%s index, half-window %d: U = %d, p < %f' % (snake2title(index_name), h, U, p)) self.out('Note p-values are one-sided, multiply by 2 for two-sided.') self.close_logfile() self.close_data_file()
def _dump_page(self): savefile = os.path.join( self.savedir, '%s_%03d.pdf' % (self.save_file_stem, self.pageno)) figtitle = snake2title(self.save_file_stem) if self.desc: figtitle += ' - %s' % self.desc.title() self.f.suptitle(figtitle) self.f.text(0.5, 0.04, 'Page %d' % self.pageno, size='small', ha='center') plt.savefig(savefile) self.out('Saved file:\n\t%s' % savefile) self.save_file_list.append(savefile) self.pageno += 1 plt.clf()
def wrapper(*args, **kwargs): self.out(snake2title(f.__name__)) return f(*args, **kwargs)
def process_data(self, bins=16): self.out.outfd = file( os.path.join(self.datadir, 'stats_%d.log' % bins), 'w') self.out.timestamp = False data_file = self.get_data_file() results = data_file.root.theta_velocity rat_list = unique_rats(results) velocity_moments = self.results['velocity_moments'] def find_velocity_bins(): vbins = {} for moment in velocity_moments: data = results.col(moment) lo, hi = CI(data, alpha=0.02) self.out('%s: range %f to %f' % (moment, lo, hi)) vbins[moment] = np.linspace(lo, hi, bins + 1) return vbins velocity_bins = find_velocity_bins() def generate_velocity_modulation_curves(): self.out('Generating velocity modulation curves...') P_v = np.empty((len(velocity_moments), len(rat_list), bins), 'd') f_v = np.empty_like(P_v) power = results.col('power') frequency = results.col('frequency') for i, moment in enumerate(velocity_moments): self.out('Computing %s...' % moment) for j, rat in enumerate(rat_list): self.out.printf('[%d] ' % rat, color='lightgreen') for k in xrange(bins): v_lo, v_hi = velocity_bins[moment][k:k + 2] query = '(rat==%d)&(%s>=%f)&(%s<%f)' % ( rat, moment, v_lo, moment, v_hi) ix = results.getWhereList(query) power_sample = power[ix] frequency_sample = frequency[ix] P_v[i, j, k] = np.median(power_sample) f_v[i, j, k] = np.median(frequency_sample) self.out.printf('\n') return P_v, f_v P_v_fn = os.path.join(self.datadir, 'P_v_%d.npy' % bins) f_v_fn = os.path.join(self.datadir, 'f_v_%d.npy' % bins) if os.path.exists(P_v_fn) and os.path.exists(f_v_fn): self.out('Loading previous velocity modulation data...') P_v, f_v = map(np.load, (P_v_fn, f_v_fn)) else: P_v, f_v = generate_velocity_modulation_curves() np.save(P_v_fn, P_v) np.save(f_v_fn, f_v) plt.ioff() if type(self.figure) is not dict: self.figure = {} self.figure['velocity_modulation_curves'] = f = plt.figure( num=20, figsize=(9, 10)) plt.clf() f.suptitle('Theta (Z-)Power, Frequency - Velocity Modulation') N_moments = len(velocity_moments) line_fmt = dict(ls='-', c='k', lw=2) shade_fmt = dict(ec='none', alpha=0.3, fill=True, fc='k', zorder=-1) def compute_error(M): return 1.96 * M.std(axis=0) / np.sqrt(M.shape[0]) def get_errlim(mu, err, factor=0.1): errmin, errmax = (mu - sem).min(), (mu + sem).max() derr = errmax - errmin return errmin - (factor / 2) * derr, errmax + (factor / 2) * derr labels = ('Z-Power', 'Frequency') for i, moment in enumerate(velocity_moments[:2]): centers = (lambda b: (b[1:] + b[:-1]) / 2)(velocity_bins[moment]) for j, X_v in enumerate([P_v, f_v]): ax = f.add_subplot(N_moments, 2, i * 2 + j + 1) mu = X_v[i].mean(axis=0) sem = compute_error(X_v[i]) ax.plot(centers, mu, **line_fmt) shaded_error(centers, mu, sem, ax=ax, **shade_fmt) self.out('%s: %s %s' % (moment.title(), labels[j], friedman_str(X_v[i]))) if i == 0: quicktitle(ax, labels[j], size='x-small') ax.set_xlabel(snake2title(moment)) ax.set_xlim(centers[0], centers[-1]) ax.set_ylim(get_errlim(mu, sem)) plt.ion() plt.show() self.close_data_file() self.out.outfd.close()
def process_data(self): """Create figure plotting distributions of scan-firing measures and compute various relevant statistics """ from scanr.tools.stats import smooth_pdf, t_one_tailed from scanr.tools.string import snake2title from scipy.stats import (ttest_ind as ttest, t as t_dist, ks_2samp as kstest) os.chdir(self.datadir) self.out.outfd = file('figure.log', 'w') self.out.timestamp = False self.figure = {} self.figure['distros'] = f = plt.figure(figsize=(10, 16)) f.suptitle('Distributions of LEC/MEC Scan/Non-scan Firing') LEC = self.results['LEC'] MEC = self.results['MEC'] data_types = LEC.dtype.names N = len(data_types) def mean_pm_sem(a): return '%.4f +/- %.4f'%(a.mean(), a.std()/np.sqrt(a.size)) plt.ioff() kw = dict(lw=2, aa=True) for i, data in enumerate(data_types): ax = plt.subplot(N, 1, i+1) label = snake2title(data) kw.update(label='LEC', c='b') ax.plot(*smooth_pdf(LEC[data]), **kw) kw.update(label='MEC', c='g') ax.plot(*smooth_pdf(MEC[data]), **kw) ax.axis('tight') v = list(ax.axis()) v[3] *= 1.1 ax.axis(v) med_LEC = np.median(LEC[data]) med_MEC = np.median(MEC[data]) ax.plot([med_LEC]*2, [v[2], v[3]], 'b--') ax.plot([med_MEC]*2, [v[2], v[3]], 'g--') self.out(label.center(50, '-')) N_LEC = LEC[data].size N_MEC = MEC[data].size self.out('Median LEC(%s) = %.4f'%(label, med_LEC)) self.out('Mean/SEM LEC(%s) = %s'%(label, mean_pm_sem(LEC[data]))) if data.endswith('norm'): self.out('T(LEC > 1) = %.4f, p = %.8f'%t_one_tailed(LEC[data])) self.out('T_cstv(LEC > 1) = %.4f, p = %.8f'%t_one_tailed( LEC[data], df=N_LEC/float(5)-1)) self.out('N LEC cell-sessions = %d'%N_LEC) self.out('Median MEC(%s) = %.4f'%(label, med_MEC)) self.out('Mean/SEM MEC(%s) = %s'%(label, mean_pm_sem(MEC[data]))) if data.endswith('norm'): self.out('T(MEC > 1) = %.4f, p = %.8f'%t_one_tailed(MEC[data])) self.out('T_cstv(MEC > 1) = %.4f, p = %.8f'%t_one_tailed( MEC[data], df=N_MEC/float(5)-1)) self.out('N MEC cell-sessions = %d'%N_MEC) t = ttest(LEC[data], MEC[data]) k = kstest(LEC[data], MEC[data]) self.out('T-test(%s): t = %.4f, p = %.8f'%(label, t[0], t[1])) self.out('KS-test(%s): D = %.4f, p = %.8f'%(label, k[0], k[1])) if t[1] < 0.05: ax.text(0.025, 0.8, '*t', size='x-large', transform=ax.transAxes) if k[1] < 0.05: ax.text(0.025, 0.6, '*KS', size='x-large', transform=ax.transAxes) ax.set_ylabel('p[ %s ]'%label) if i == 0: ax.legend(loc=1) plt.ion() plt.show() self.out.outfd.close()
def bootstrap_overall_fractions(self, first_day=False, shuffles=1000, ymax=0.6): """Note: this is old processing code, before the proper within-rat analysis was implemented """ self.out.outfd = file(os.path.join(self.datadir, 'figure.log'), 'w') self.out.timestamp = False if first_day: daystr = '(day==1)&' suffix = '_day1' daylabel = 'First Day ' else: daystr = '(day!=0)&' suffix = '' daylabel = '' data_file = self.get_data_file() field_data = data_file.root.place_fields N = field_data.nrows self.out("Found %d rows of place field data." % N) if os.path.exists( os.path.join(self.datadir, 'F_DR_type%s.npy' % suffix)): self.out('Loading: DR, across maze type') F_DR_type = np.load( os.path.join(self.datadir, 'F_DR_type%s.npy' % suffix)) else: self.out('Computing: DR, across maze type') F_DR_type = np.zeros((shuffles + 1, 2), 'd') for j, mtype in enumerate(('STD', 'MIS')): self.out('Type = %s' % mtype) I = field_data.getWhereList(daystr + '(session!=1)&(type=="%s")' % mtype) N = len(I) if not N: continue B = np.vstack((np.arange(N), np.random.random_integers(0, N - 1, size=(shuffles, N)))) for i in xrange(shuffles + 1): self.out.printf('.') F_DR_type[i, j] = sum( [field_data[I[ix]]['events'] > 0 for ix in B[i]]) / N self.out.printf('\n') np.save(os.path.join(self.datadir, 'F_DR_type%s' % suffix), F_DR_type) if os.path.exists(os.path.join(self.datadir, 'F_DR_num%s.npy' % suffix)): self.out('Loading: DR, across maze number') F_DR_num = np.load( os.path.join(self.datadir, 'F_DR_num%s.npy' % suffix)) else: self.out('Computing: DR, across maze number') F_DR_num = np.zeros((shuffles + 1, 5), 'd') for j, number in enumerate(range(1, 6)): self.out('Session = %d' % number) I = field_data.getWhereList(daystr + '(expt_type=="DR")&(session==%d)' % number) N = len(I) if not N: continue B = np.vstack((np.arange(N), np.random.random_integers(0, N - 1, size=(shuffles, N)))) for i in xrange(shuffles + 1): self.out.printf('.') F_DR_num[i, j] = sum( [field_data[I[ix]]['events'] > 0 for ix in B[i]]) / N self.out.printf('\n') np.save(os.path.join(self.datadir, 'F_DR_num%s' % suffix), F_DR_num) if os.path.exists( os.path.join(self.datadir, 'F_nov_type%s.npy' % suffix)): self.out('Loading: Nov, across maze type') F_nov_type = np.load( os.path.join(self.datadir, 'F_nov_type%s.npy' % suffix)) else: self.out('Computing: Nov, across maze type') F_nov_type = np.zeros((shuffles + 1, 2), 'd') for j, mtype in enumerate(('FAM', 'NOV')): self.out('Type = %s' % mtype) I = field_data.getWhereList(daystr + '(session!=1)&(type=="%s")' % mtype) N = len(I) if not N: continue B = np.vstack((np.arange(N), np.random.random_integers(0, N - 1, size=(shuffles, N)))) for i in xrange(shuffles + 1): self.out.printf('.') F_nov_type[i, j] = sum( [field_data[I[ix]]['events'] > 0 for ix in B[i]]) / N self.out.printf('\n') np.save(os.path.join(self.datadir, 'F_nov_type%s' % suffix), F_nov_type) if os.path.exists( os.path.join(self.datadir, 'F_nov_num%s.npy' % suffix)): self.out('Loading: Nov, across maze number') F_nov_num = np.load( os.path.join(self.datadir, 'F_nov_num%s.npy' % suffix)) else: self.out('Computing: Nov, across maze number') F_nov_num = np.zeros((shuffles + 1, 3), 'd') for j, number in enumerate(range(1, 4)): self.out('Session = %d' % number) I = field_data.getWhereList( daystr + '(expt_type=="NOV")&(session==%d)' % number) N = len(I) if not N: continue B = np.vstack((np.arange(N), np.random.random_integers(0, N - 1, size=(shuffles, N)))) for i in xrange(shuffles + 1): self.out.printf('.') F_nov_num[i, j] = sum( [field_data[I[ix]]['events'] > 0 for ix in B[i]]) / N self.out.printf('\n') np.save(os.path.join(self.datadir, 'F_nov_num%s' % suffix), F_nov_num) self.close_data_file() # Bar charts, maze number and maze type if type(self.figure) is not dict: self.figure = {} self.figure['field_stats%s' % suffix] = f = plt.figure(figsize=(10, 7)) f.suptitle('Prevalence Fraction of %sEvents: %s' % (daylabel, snake2title(self.results['table_name']))) fmt = dict(width=0.3, linewidth=0, color='0.4', edgecolor='none', ecolor='k', capsize=0) def error(F, alpha=0.05): e = np.empty((2, F.shape[1]), 'd') for i, vals in enumerate(F[1:].T): e[:, i] = CI(vals, alpha=alpha) # empirical confidence interval e -= F[0] # errorbar yerr weirdness, mean - row1 to mean + row2 e[0] *= -1 # must invert to get appropriate behavior!!! return e ax = f.add_subplot(221) ax.bar([0, 0.4, 1, 1.4], np.r_[F_DR_type[0], F_nov_type[0]], yerr=error(np.c_[F_DR_type, F_nov_type]), **fmt) ax.set_xlim(-0.3, 3.7) ax.set_ylim(0, ymax) ax.set(xticks=[], xticklabels=[]) ax.tick_params(right=False, direction='out') ax.axhline(1.0, c='k', ls='--', lw=1) ax = f.add_subplot(222) ax.boxplot([ F_DR_type[:, 0], F_DR_type[:, 1], F_nov_type[:, 0], F_nov_type[:, 1] ]) ax.set_ylim(0, ymax) ax = f.add_subplot(223) ax.bar([0, 0.4, 0.8, 1.2, 1.6, 2.4, 2.8, 3.2], np.r_[F_DR_num[0], F_nov_num[0]], yerr=error(np.c_[F_DR_num, F_nov_num]), **fmt) ax.set_xlim(-0.3, 5.1) ax.set_ylim(0, ymax) ax.set(xticks=[], xticklabels=[]) ax.tick_params(right=False, direction='out') ax.axhline(1.0, c='k', ls='--', lw=1) ax = f.add_subplot(224) ax.boxplot([ F_DR_num[:, 0], F_DR_num[:, 1], F_DR_num[:, 2], F_DR_num[:, 3], F_DR_num[:, 4], F_nov_num[:, 0], F_nov_num[:, 1], F_nov_num[:, 2] ]) ax.set_ylim(0, ymax) self.out.outfd.close() self.close_data_file()
def plot_roc_curves(self, ylim=(0.5, 1.0), skinny_sweep_ax=False): self.start_logfile('roc_auc') data_file = self.get_data_file(mode='a') roc_group = data_file.root.roc_curves outcome_table = data_file.root.hit_outcome scan_phase = data_file.root.scan_cell_info._v_attrs['scan_phase'] roc_attrs = roc_group._v_attrs scan_cell_condn = roc_attrs['condn'] is_randomized = roc_attrs['randomize'] test = roc_attrs['test_operator'] event_table = roc_attrs['event_table'] h_to_colname = self._find_half_windows(outcome_table) h_windows = sorted(h_to_colname.keys()) self.close_figures() plt.ioff() if type(self.figure) is not dict: self.figure = {} fmt = dict(lw=2, ls='-') #, drawstyle='steps-mid') ndfmt = dict(c='0.4', ls='--', lw=1.5, zorder=-1) colors = plt.cm.coolwarm(np.linspace(0, 1, len(h_windows))) for index_name in ScanIndex.ActiveNames: f = self.new_figure('%s_ROC_curves' % index_name, self._make_figure_title( index_name, h_windows, scan_cell_condn, scan_phase, event_table, test, randomized=is_randomized), figsize=(11, 8)) ax_roc = f.add_subplot(121) if skinny_sweep_ax: ax_auc = f.add_subplot(2, 5, 10) else: ax_auc = f.add_subplot(224) AUC = np.empty(len(h_windows), 'd') ndline = ax_roc.plot([0, 1], [0, 1], **ndfmt) for i, h in enumerate(h_windows): FPR, TPR = data_file.getNode( roc_group, self._roc_array_name(index_name, h)).read() AUC[i] = self._compute_AUC(TPR, FPR, test) ax_roc.plot(FPR, TPR, label=str(h), c=colors[i], zorder=len(h_windows) - i, **fmt) self.out('%s index, window %d: AUC = %f' % (snake2title(index_name), h, AUC[i])) arr = create_array( data_file, '/roc_curves', self._auc_array_name(index_name), AUC, title='ROC-AUC Across Half-Windows for %s Index' % index_name) self.out('Saved %s.' % arr._v_pathname) if len(h_windows) < 11: ax_roc.legend(loc='lower right') ax_roc.set(xlim=(0, 1), ylim=(0, 1)) ax_roc.set_ylabel('TPR') ax_roc.set_xlabel('FPR') ax_roc.axis('scaled') chance_line = ax_auc.axhline(0.5, **ndfmt) ax_auc.plot(h_windows, AUC, '-', c='steelblue', lw=3, solid_capstyle='round') ax_auc.set(xlabel='Half-Window, degrees', ylabel="AUC", xlim=(0, h_windows[-1] + h_windows[0]), ylim=ylim, xticks=h_windows, xticklabels=([h_windows[0]] + [''] * (len(h_windows) - 2) + [h_windows[-1]])) plt.ion() plt.show() self.close_logfile() self.close_data_file()