def test_hdf(tempdir): """Test saving/loading with HDF5.""" data = np.random.random((10, 10, 10, 10)) dims = ('time', 'x', 'y', 'z') coords = {label: np.linspace(0, 1, 10) for label in dims} rate = 1 ts = TimeSeries.create(data, rate, coords=coords, dims=dims, name="test") filename = osp.join(tempdir, "timeseries.h5") ts.to_hdf(filename) with h5py.File(filename, 'r') as hfile: assert "data" in hfile assert "dims" in hfile assert "coords" in hfile assert "name" in list(hfile['/'].attrs.keys()) assert "ptsa_version" in hfile.attrs assert "created" in hfile.attrs loaded = TimeSeries.from_hdf(filename) assert np.all(loaded.data == data) for coord in loaded.coords: assert (loaded.coords[coord] == ts.coords[coord]).all() for n, dim in enumerate(dims): assert loaded.dims[n] == dim assert loaded.name == "test" ts_with_attrs = TimeSeries.create(data, rate, coords=coords, dims=dims, name="test", attrs=dict(a=1, b=[1, 2])) ts_with_attrs.to_hdf(filename) loaded = TimeSeries.from_hdf(filename) for key in ts_with_attrs.attrs: assert ts_with_attrs.attrs[key] == loaded.attrs[key] assert np.all(loaded.data == data) for coord in loaded.coords: assert (loaded.coords[coord] == ts.coords[coord]).all() for n, dim in enumerate(dims): assert loaded.dims[n] == dim assert loaded.name == "test" # test compression: ts_with_attrs.to_hdf(filename, compression='gzip', compression_opts=9) loaded = TimeSeries.from_hdf(filename) for key in ts_with_attrs.attrs: assert ts_with_attrs.attrs[key] == loaded.attrs[key] assert np.all(loaded.data == data) for coord in loaded.coords: assert (loaded.coords[coord] == ts.coords[coord]).all() for n, dim in enumerate(dims): assert loaded.dims[n] == dim assert loaded.name == "test"
def test_load_hdf_base64(): """Test that we can still load the base64-encoded HDF5 format.""" filename = osp.join(osp.dirname(__file__), "data", "R1111M_base64.h5") ts = TimeSeries.from_hdf(filename) assert "event" in ts.coords assert len(ts.coords["event"] == 10)
def test_hdf(self, tmpdir): from cmlreaders.readers.readers import EventReader from unittest.mock import patch efile = osp.join(osp.dirname(__file__), "data", "R1111M_FR1_0_events.json") filename = str(tmpdir.join("test.h5")) events = EventReader.fromfile(efile, subject="R1111M", experiment="FR1") ev = events[events.eegoffset > 0].sample(n=5) rel_start, rel_stop = 0, 10 get_eeg = partial(self.make_eeg, ev, rel_start, rel_stop) reader = self.reader with patch.object(reader, "load_eeg", return_value=get_eeg()): eeg = reader.load_eeg(events=ev, rel_start=0, rel_stop=10) ts = eeg.to_ptsa() ts.to_hdf(filename) ts2 = TimeSeries.from_hdf(filename) assert_timeseries_equal(ts, ts2)
def test_hdf_rhino(self, tmpdir): from cmlreaders.warnings import MultiplePathsFoundWarning filename = str(tmpdir.join("test.h5")) with warnings.catch_warnings(): warnings.simplefilter("ignore", MultiplePathsFoundWarning) events = self.reader.load("events") ev = events[events.eegoffset > 0].sample(n=5) eeg = self.reader.load_eeg(events=ev, rel_start=0, rel_stop=10) ts = eeg.to_ptsa() ts.to_hdf(filename) ts2 = TimeSeries.from_hdf(filename) assert_timeseries_equal(ts, ts2)
def test_hdf(tempdir): """Test saving/loading with HDF5.""" data = np.random.random((10, 10, 10, 10)) dims = ('time', 'x', 'y', 'z') coords = {label: np.linspace(0, 1, 10) for label in dims} rate = 1 ts = TimeSeries.create(data, rate, coords=coords, dims=dims, name="test") filename = osp.join(tempdir, "timeseries.h5") ts.to_hdf(filename) with h5py.File(filename, 'r') as hfile: assert "data" in hfile assert "dims" in hfile assert "coords" in hfile assert "name" in list(hfile['/'].attrs.keys()) assert "ptsa_version" in hfile.attrs assert "created" in hfile.attrs loaded = TimeSeries.from_hdf(filename) assert np.all(loaded.data == data) for coord in loaded.coords: assert (loaded.coords[coord] == ts.coords[coord]).all() for n, dim in enumerate(dims): assert loaded.dims[n] == dim assert loaded.name == "test" ts_with_attrs = TimeSeries.create(data, rate, coords=coords, dims=dims, name="test", attrs=dict(a=1, b=[1, 2])) ts_with_attrs.to_hdf(filename) loaded = TimeSeries.from_hdf(filename) for key in ts_with_attrs.attrs: assert ts_with_attrs.attrs[key] == loaded.attrs[key] assert np.all(loaded.data == data) for coord in loaded.coords: assert (loaded.coords[coord] == ts_with_attrs.coords[coord]).all() for n, dim in enumerate(dims): assert loaded.dims[n] == dim assert loaded.name == "test" # test compression: ts_with_attrs.to_hdf(filename, compression='gzip', compression_opts=9) loaded = TimeSeries.from_hdf(filename) for key in ts_with_attrs.attrs: assert ts_with_attrs.attrs[key] == loaded.attrs[key] assert np.all(loaded.data == data) for coord in loaded.coords: assert (loaded.coords[coord] == ts_with_attrs.coords[coord]).all() for n, dim in enumerate(dims): assert loaded.dims[n] == dim assert loaded.name == "test" # test different containers as dims: data = np.random.random((3, 7, 10, 4)) dims = ('time', 'recarray', 'list', 'recordarray') coords = { 'time': np.linspace(0, 1, 3), 'recarray': np.array([(i, j, k) for i, j, k in zip(np.linspace( 0, 1, 7), np.linspace(1000, 2000, 7), np.linspace(0, 1, 7))], dtype=[('field1', np.float), ('field2', np.int), ('field3', 'U20')]), 'list': list(np.linspace(100, 200, 10)), 'recordarray': np.array([(i, j, k) for i, j, k in zip(np.linspace( 0, 1, 4), np.linspace(1000, 2000, 4), np.linspace(0, 1, 4))], dtype=[('field1', np.float), ('field2', np.int), ('field3', 'U20')]).view(np.recarray) } rate = 1 ts = TimeSeries.create(data, rate, coords=coords, dims=dims, name="container test") ts.to_hdf(filename, compression='gzip', compression_opts=9) with h5py.File(filename, 'r') as hfile: assert "data" in hfile assert "dims" in hfile assert "coords" in hfile assert "name" in list(hfile['/'].attrs.keys()) assert "ptsa_version" in hfile.attrs assert "created" in hfile.attrs loaded = TimeSeries.from_hdf(filename) for key in ts.attrs: assert ts.attrs[key] == loaded.attrs[key] assert np.all(loaded.data == data) for coord in loaded.coords: # dtypes can be slightly differnt for recarrays: assert (np.array(loaded.coords[coord], ts[coord].values.dtype) == ts.coords[coord]).all() for coord in ts.coords: # dtypes can be slightly differnt for recarrays: assert (np.array(loaded.coords[coord], ts[coord].values.dtype) == ts.coords[coord]).all() for n, dim in enumerate(dims): assert loaded.dims[n] == dim assert loaded.name == "container test"
def calc_subj_pep(subj, elecs=None, method='bip', relstart=300, relstop=1301, freq_specs=(2, 120, 30), percentthresh=.95, numcyclesthresh=3, load_eeg=False, save_eeg=False, save_result=False, plot=False, kind='r1', experiment='FR1', eeg_path='~/', result_path='~/'): """ Inputs: subj - subject string elecs - list of electrode pairs (strings) method - bip or avg depending on referencing scheme freq_specs - tuple of (low_freq, high_freq, num_freqs) for background fitting in BOSC. Returns: pep_all - average Pepisode for all words at each frequency pep_rec - average Pepisode for recalled words at each frequency pep_nrec - average Pepisode for non-recalled words at each frequency subj_tscores - t-score at each frequency, comparing rec and nrec across events ** Note that tscore is not itself meaningful because events are not independent. Comparing these tscores across subjects, however, is valid. """ if save_eeg and load_eeg: raise ('Cannot save and load eeg simultaneously.') print('Subject: ', subj) if elecs is None: good_subj = pd.read_pickle( '/home1/jrudoler/Theta_Project/hippo_subject_pairs.csv') elecs = good_subj[good_subj['Subject'] == subj]['hippo_pairs'].iloc[0] subj_pepisode = None subj_recalled = None subj_tscores = None if plot: plt.figure(figsize=(12, 6)) lowfreq, highfreq, numfreqs = freq_specs print(elecs) for pair_str in elecs: chans = pair_str.split('-') data = cml.get_data_index(kind=kind) data = data[data['experiment'] == experiment] sessions = data[data['subject'] == subj]['session'].unique() pepisodes = None # events, freqs recalled = None # events, freqs tscore = None for sess in sessions: try: print('Loading session {} EEG'.format(sess)) reader = cml.CMLReader(subject=subj, experiment=experiment, session=sess) all_events = reader.load('task_events') if not os.path.exists(eeg_path): os.makedirs(eeg_path) if load_eeg: eeg = TimeSeries.from_hdf(eeg_path + 'session_' + str(sess) + '_' + pair_str) bosc = P_episode(all_events, eeg, sr=eeg.samplerate.values, lowfreq=lowfreq, highfreq=highfreq, numfreqs=numfreqs) elif method == 'bip': pairs = reader.load("pairs") # bipolar eeg bip = reader.load_eeg( scheme=pairs[pairs.label == pair_str])\ .to_ptsa().mean(['event', 'channel']) bip = ButterworthFilter(bip, freq_range=[58., 62.], filt_type='stop', order=4).filter() print("Applying BOSC method!") if save_eeg: bip.to_hdf(eeg_path + 'session_' + str(sess) + '_' + pair_str) bosc = P_episode(all_events, bip, sr=bip.samplerate.values, lowfreq=lowfreq, highfreq=highfreq, numfreqs=numfreqs) elif method == 'avg': contacts = reader.load("contacts") # average eeg eeg = reader.load_eeg( scheme=contacts).to_ptsa().mean('event') # all zeros from a broken lead leads to -inf power, # which results in a LinAlg error for log-log fit # TODO: verify this channel exclusion doesn't cause any # problems. Maybe print a message or raise an error? bad_chan_mask = ~np.all(eeg.values == 0, axis=1) contacts = contacts[bad_chan_mask] eeg = eeg[bad_chan_mask, :] avg = (eeg[contacts.label.str.contains(chans[0]) | \ contacts.label.str.contains(chans[1]), :] - eeg.mean('channel') ).mean('channel') avg = ButterworthFilter(avg, freq_range=[58., 62.], filt_type='stop', order=4).filter() if save_eeg: avg.to_hdf(eeg_path + '/session_' + str(sess) + '_' + pair_str) bosc = P_episode(all_events, avg, sr=avg.samplerate.values, lowfreq=lowfreq, highfreq=highfreq, numfreqs=numfreqs) if plot: bosc.background_fit(plot_type='session') if pepisodes is None: pepisodes = bosc.Pepisode # be careful to only use events from lists that have eeg data. # [np.isin(bosc.interest_events.list, self.lists)] recalled = bosc.interest_events.recalled.values tscore, _ = scp.ttest_ind(pepisodes[recalled], pepisodes[~recalled], axis=0) elif np.isnan(tscore).all(): tscore, _ = scp.ttest_ind(pepisodes[recalled], pepisodes[~recalled], axis=0) else: pepisodes = np.vstack([pepisodes, bosc.Pepisode]) recalled = np.hstack( [recalled, bosc.interest_events.recalled.values]) t, _ = scp.ttest_ind(pepisodes[recalled], pepisodes[~recalled], axis=0) tscore = np.vstack([tscore, t]) print("Proportion recalled:", recalled.mean()) except IndexError: print('IndexError for subject {} session {}'.format( subj, sess)) except FileNotFoundError: print('FileNotFoundError for {} session {}'.format(subj, sess)) continue if pepisodes is None: raise Exception('No working sessions') subj_pepisode = pepisodes if subj_pepisode is None else np.dstack( [subj_pepisode, pepisodes]) subj_recalled = recalled if subj_recalled is None else np.vstack( [subj_recalled, recalled]) subj_tscores = tscore if subj_tscores is None else np.vstack( [subj_tscores, tscore]) if np.isnan(subj_tscores).all(): raise Exception('Too many nan in T-scores. This problem can arise \ when there are no recalled events.') if subj_pepisode.ndim > 2: # if multiple electrode pairs, average over pairs print("Averaging over {} electrodes for subject {}".format( subj_pepisode.shape[2], subj)) subj_pepisode = subj_pepisode.mean(2) subj_recalled = subj_recalled.mean(0) subj_recalled = subj_recalled.astype(bool) if subj_tscores.ndim > 1: print(len(sessions), 'sessions') subj_tscores = np.nanmean(subj_tscores, axis=0) print('{} total events: {} recalled \ and {} non-recalled'.format(len(subj_recalled), sum(subj_recalled), sum(~subj_recalled))) pep_rec = subj_pepisode[subj_recalled, :].mean(0) pep_nrec = subj_pepisode[~subj_recalled, :].mean(0) pep_all = subj_pepisode.mean(0) if save_result: if not os.path.exists(result_path): os.makedirs(result_path) np.save(result_path + '{}_all_{}'.format(subj, method), pep_all) np.save(result_path + '{}_rec_{}'.format(subj, method), pep_rec) np.save(result_path + '{}_nrec_{}'.format(subj, method), pep_nrec) np.save(result_path + '{}_tscore_{}'.format(subj, method), subj_tscores) return pep_all, pep_rec, pep_nrec, subj_tscores