def test_get_eeg(which, subject, experiment, session, shape, rhino_root): reader = CMLReader(subject, experiment, session, rootdir=rhino_root) all_events = reader.load("events") events = all_events[all_events.type == "STIM_ON"] eeg = tmi.get_eeg(which, reader, events) assert eeg.shape == shape
def test_get_stim_channels(rhino_root): reader = CMLReader("R1111M", "FR2", 0, rootdir=rhino_root) pairs = reader.load("pairs") events = tmi.get_stim_events(reader) channels = tmi.get_stim_channels(pairs, events) assert len(channels) == 1 assert channels == [140]
def test_load_multisession(self, subjects, experiments, rhino_root): events = CMLReader.load_events(subjects, experiments, rootdir=rhino_root) good_sample = False while not good_sample: events = events.copy()[events["type"] == "WORD"].sample(20) good_sample = ( all([s in events.subject.values for s in subjects]) and all([e in events.experiment.values for e in experiments])) reader = CMLReader(events["subject"].unique()[0], rootdir=rhino_root) load = lambda: reader.load_eeg(events, rel_start=0, rel_stop=10 ) # noqa if len(subjects) > 1: with pytest.raises(ValueError): load() return eeg = load() assert len(eeg.epochs) == len(events) assert len(eeg.events) == len(events) for subject in subjects: assert subject in set(events["subject"]) for experiment in experiments: assert experiment in set(events["experiment"])
def test_rereference(self, subject, reref_possible, index, channel, rhino_root): reader = CMLReader(subject=subject, experiment='FR1', session=0, rootdir=rhino_root) rate = reader.load("sources")["sample_rate"] events = reader.load("events") events = events[events.type == "WORD"].iloc[:1] rel_start, rel_stop = 0, 100 expected_samples = int(rate * rel_stop / 1000) scheme = reader.load('pairs') load_eeg = partial(reader.load_eeg, events=events, rel_start=rel_start, rel_stop=rel_stop) if reref_possible: data = load_eeg() assert data.shape == (1, 100, expected_samples) data = load_eeg(scheme=scheme) assert data.shape == (1, 141, expected_samples) assert data.channels[index] == channel else: data_noreref = load_eeg() data_reref = load_eeg(scheme=scheme) assert_equal(data_noreref.data, data_reref.data) assert data_reref.channels[index] == channel
def test_load(self, file_type): with patched_cmlreader(datafile(file_type)): data_type = os.path.splitext(file_type)[0] reader = CMLReader(subject="R1405E", localization=0, experiment="FR5", session=1) reader.load(data_type=data_type)
def test_read_whole_session(self, subject, rhino_root): reader = CMLReader(subject=subject, experiment="FR1", session=0, rootdir=rhino_root) eeg = reader.load_eeg() assert eeg.shape == (1, 70, 3304786)
def test_load_unimplemented(self): with patched_cmlreader(): reader = CMLReader(subject='R1405E', localization=0, experiment='FR1', session=0, montage=0) with pytest.raises(NotImplementedError): reader.load("fake_data")
def test_get_reader(self, file_type): with patched_cmlreader(): reader = CMLReader(subject='R1405E', localization=0, experiment='FR1', session=0, montage=0) reader_obj = reader.get_reader(file_type) assert type(reader_obj) == reader.readers[file_type]
def test_read_categories_rhino(self, kind, read_categories, rhino_root): reader = CMLReader("R1111M", "FR1", 0, 0, 0, rootdir=rhino_root) df = reader.load(kind, read_categories=read_categories) if read_categories: categories = reader.load("electrode_categories") else: categories = None self.assert_categories_correct(df, categories, read_categories)
def test_load_events(self, subjects, experiments, unique_sessions, rhino_root): if subjects is experiments is None: with pytest.raises(ValueError): CMLReader.load_events(subjects, experiments, rootdir=rhino_root) return events = CMLReader.load_events(subjects, experiments, rootdir=rhino_root) size = len(events.groupby(["subject", "experiment", "session"]).size()) assert size == unique_sessions
def test_negative_offsets(self, rhino_root): subject, experiment = ("R1298E", "FR1") reader = CMLReader(subject=subject, experiment=experiment, session=0, rootdir=rhino_root) events = reader.load("events") events = events[events["type"] == "WORD"].iloc[:2] eeg = reader.load_eeg(events=events, rel_start=-100, rel_stop=-20) assert eeg.shape[-1] == 80
def test_eeg_reader(self, subject, index, channel, rhino_root): reader = CMLReader(subject=subject, experiment='FR1', session=0, rootdir=rhino_root) events = reader.load("events") events = events[events["type"] == "WORD"].iloc[:2] eeg = reader.load_eeg(events=events, rel_start=0, rel_stop=100) assert len(eeg.time) == 100 assert eeg.data.shape[0] == 2 assert eeg.channels[index] == channel
def test_read_eeg(subject, rhino_root): reader = CMLReader(subject, 'FR1', session=0, rootdir=rhino_root) samplerate = reader.load('sources')['sample_rate'] events = get_countdown_events(reader) resting = countdown_to_resting(events, samplerate) eeg = read_eeg_data(reader, resting, reref=False) # R1387E FR1 session 0 had 13 countdown start events and we get 3 epochs per # countdown expected_events = 13 * 3 assert eeg.shape == (expected_events, 121, 1000)
def test_read_categories_missing(self, kind, rhino_root): """Try reading with electrode category info when that can't be found.""" subject, experiment, session = "R1132C", "TH1", 0 localization, montage = 0, 0 reader = CMLReader(subject, experiment, session, localization, montage, rootdir=rhino_root) with pytest.raises(exc.MissingDataError): reader.load(kind, read_categories=True)
def test_get_distances(): pkg = "thetamod.test.data" filename = resource_filename(pkg, "R1260D_pairs.json") reader = CMLReader("R1260D") pairs = reader.load( "pairs", file_path=filename).sort_values(by=['contact_1', 'contact_2']) filename = resource_filename(pkg, "R1260D_distmat.npy") ref_result = np.load(filename) distmat = tmi.get_distances(pairs) assert_almost_equal(distmat, ref_result)
def load_eeg(self, subject: str, experiment: str) -> TimeSeries: """Load EEG data for all sessions of the given experiment. :param subject: subject ID :param experiment: experiment name """ logger.info("Loading EEG for %s/%s", subject, experiment) events = CMLReader.load_events(subject, experiment) words = events[events.type == "WORD"] reader = CMLReader(subject, experiment) eeg = reader.load_eeg(events=words, rel_start=0, rel_stop=1600) return eeg
def test_invalidate_eeg(rhino_root): reader = CMLReader(subject='R1286J', experiment='catFR3', session=0, rootdir=rhino_root) pairs = reader.load("pairs") stim_events = get_stim_events(reader) pre_eeg, post_eeg = (get_eeg(which, reader, stim_events) for which in ("pre", "post")) thetamod.artifact.invalidate_eeg(reader, pre_eeg, post_eeg, rhino_root)
def __init__(self, subject, experiment, session, outdir, rootdir=None): self.rootdir = get_root_dir(rootdir) self.outdir = Path(self.rootdir).joinpath(outdir) self.reader = CMLReader(subject, experiment, session, rootdir=rootdir) sources_filename = self.reader.path_finder.find("sources") with open(sources_filename, "r") as infile: self.sources = json.load(infile) self.eeg_files = [ sorted( Path(sources_filename).parent.joinpath("noreref").glob( info["name"] + "*")) for info in self.sources.values() ]
def test_load_from_rhino(self, subject, experiment, session, localization, file_type, rhino_root): if subject.startswith("LTP"): reader = CMLReader(subject=subject, experiment=experiment, session=session, localization=localization, rootdir=rhino_root) if "ltp" not in reader.reader_protocols[file_type]: with pytest.raises(exc.UnsupportedProtocolError): reader.load(file_type) return if file_type in ["electrode_categories", "classifier_summary", "math_summary", "session_summary", "baseline_classifier"]: subject = "R1111M" experiment = "FR2" session = 0 if file_type in ["used_classifier"]: subject = 'R1409D' experiment = 'FR6' session = 0 localization = 0 if subject.startswith("LTP") and file_type in ["contacts", "pairs"]: pytest.xfail("unsure if montage data exists for LTP") reader = CMLReader(subject=subject, localization=localization, experiment=experiment, session=session, rootdir=rhino_root) reader.load(file_type)
def test_resting_state_connectivity(rhino_root): subject = "R1354E" index = get_data_index("r1", rhino_root) sessions = index[(index.subject == subject) & (index.experiment == 'FR1')].session.unique() all_events = [] all_resting = [] data = [] for session in sessions: reader = CMLReader(subject, 'FR1', session, rootdir=rhino_root) events = get_countdown_events(reader) resting = countdown_to_resting(events, reader.load('sources')['sample_rate']) all_events.append(events) all_resting.append(resting) eeg = read_eeg_data(reader, resting, reref=False) data.append(eeg) # Verify that events match Ethan's analysis; his events are ordered in an # odd way, so we have to sort them to make sure they match ethan = np.load(resource_filename("thetamod.test.data", "R1354E_events_ethan.npy")) assert_equal(sorted(ethan["eegoffset"]), sorted(pd.concat(all_resting).eegoffset.values)) eegs = TimeSeries.concatenate(data) eegs.data = ButterworthFilter(time_series=eegs.to_ptsa(), ).filter().values conn = get_resting_state_connectivity(eegs.to_mne(), eegs.samplerate) basename = ('{subject}_baseline3trials_network_theta-alpha.npy' .format(subject=subject)) filename = Path(rhino_root).joinpath('scratch', 'esolo', 'tmi_analysis', subject, basename) data = np.load(filename) np.savez("test_output.npz", eeg=eegs.data, my_conn=conn, ethans_conn=data, events=pd.concat(all_events, ignore_index=True).to_records(), resting=pd.concat(all_resting, ignore_index=True).to_records()) assert_almost_equal(scipy.special.logit(conn), scipy.special.logit(data), 3)
def test_determine_localization_or_montage(self, subject, experiment, session, localization, montage): with patched_cmlreader(): reader = CMLReader(subject=subject, experiment=experiment, session=session) assert reader.montage == montage assert reader.localization == localization
def test_compute_psd(rhino_root): ethan = np.load( resource_filename("thetamod.test.data", "R1260D_catFR3_psd.npz")) sessions = (0, 2) readers = [ CMLReader("R1260D", "catFR3", session, rootdir=rhino_root) for session in sessions ] stim_events = [tmi.get_stim_events(reader) for reader in readers] pre_eegs = TimeSeries.concatenate([ tmi.get_eeg("pre", reader, events) for reader, events in zip(readers, stim_events) ]) post_eegs = TimeSeries.concatenate([ tmi.get_eeg("post", reader, events) for reader, events in zip(readers, stim_events) ]) pre_psd = tmi.compute_psd(pre_eegs) post_psd = tmi.compute_psd(post_eegs) np.savez("test_output.npz", pre_psd=pre_psd, post_psd=post_psd, ethan_pre_psd=ethan["pre"], ethan_post_psd=ethan["post"]) is_not_nan_pre = ~np.isnan(ethan["pre"]) is_not_nan_post = ~np.isnan(ethan["post"]) assert_allclose( ethan["pre"][is_not_nan_pre], pre_psd[is_not_nan_pre], ) assert_allclose(ethan["post"][is_not_nan_post], post_psd[is_not_nan_post])
def get_cmlevents(subj, montage=None, session=None, exp='TH1'): """ Returns the reformatted events df for subj and mont. This events struct does not include pathInfo, since that isn't recorded in the system used by cmlreaders. To get pathInfo you need to use `read_path_log`. """ #------Load data index for RAM df = get_data_index("r1") #------Specify the df for this subject and exp this_df = df[(df['subject'] == subj) & (df['experiment'] == exp)] #------Find out the sessions, localization, and montage for this subject if session is None: # default to first sess session = this_df['session'].iloc[0] if montage is None: # default to first mont montage = this_df['montage'].iloc[0] #------Get more specific df this_specific_df = (this_df[(this_df['session'] == session) & (this_df['montage'] == montage)]) loc = int(this_specific_df.iloc()[0]['localization']) #-------Subjs with a montage above 0 have aliases used in log files subject_alias = this_specific_df['subject_alias'].iloc[0] # ^ use .iloc[0] bc this_specific_df has only one item #------For some subjs the sess ID system changed over time, # and we need to know the original sess ID for certain log # files access orig_sess_ID = this_specific_df['original_session'].iloc[0] if type(orig_sess_ID) == str: orig_sess_ID = np.float64(orig_sess_ID) # I do it as float first in case of NaN if orig_sess_ID == int(orig_sess_ID): orig_sess_ID = int(orig_sess_ID) if np.isnan(orig_sess_ID): orig_sess_ID = session #------Use CMLReader to read the events structure reader = CMLReader(subj, exp, session=session, montage=montage, localization=loc) events = reader.load('events') events['original_session_ID'] = orig_sess_ID events['subject_alias'] = subject_alias # remove the unhelpful and inconsistent SESS_START event events = events[events['type'] != 'SESS_START'] return events
def is_rerefable(subject: str, experiment: str, session: int, localization: int = 0, montage: int = 0, rootdir: Optional[str] = None) -> bool: """Checks if a subject's EEG data can be arbitrarily rereferenced. Parameters ---------- subject Subject ID. experiment Experiment. session Session number. localization Localization number (default: 0). montage Montage number (default: 0). rootdir Root data directory. Returns ------- Whether or not the EEG data can be rereferenced. """ from cmlreaders import CMLReader reader = CMLReader(subject, experiment, session, localization, montage, rootdir=rootdir) sources = reader.load("sources") if sources["source_file"] == "eeg_timeseries.h5": path = Path(sources["path"]).parent.joinpath("noreref") if len(list(path.glob("*.h5"))) == 1: # only one HDF5 is present which indicates we recorded in hardware # bipolar mode return False return True
def test_channel_discrepancies(self, subject, experiment, session, eeg_channels, pairs_channels, rhino_root): """Test loading of known subjects with differences between channels in pairs.json and channels actually recorded. """ reader = CMLReader(subject, experiment, session, rootdir=rhino_root) pairs = reader.load("pairs") events = reader.load("events") with pytest.warns(MissingChannelsWarning): eeg = reader.load_eeg(events.sample(n=1), rel_start=0, rel_stop=10, scheme=pairs) assert len(eeg.channels) == eeg_channels assert len(pairs) == pairs_channels
def test_get_data_index(self, protocol): if protocol is "all": path = resource_filename("cmlreaders.test.data", "r1.json") else: path = resource_filename("cmlreaders.test.data", protocol + ".json") with patch.object(PathFinder, "find", return_value=path): ix = CMLReader.get_data_index(protocol) assert all(ix == get_data_index(protocol))
def reader(self): from cmlreaders import CMLReader from ptsa.test.utils import get_rhino_root try: rootdir = get_rhino_root() except OSError: rootdir = None return CMLReader("R1111M", "FR1", 0, rootdir=rootdir)
def test_saturated_events(subject, experiment, session, kind, rhino_root): reader = CMLReader(subject, experiment, session=session, rootdir=rhino_root) events = get_stim_events(reader) eeg = get_eeg(kind, reader, events).data ethan_artifact_mask = ethan.pred_stim_pipeline.find_sat_events(eeg) new_artifact_mask = thetamod.artifact.get_saturated_events_mask(eeg) assert (ethan_artifact_mask == new_artifact_mask).all()
def load_subj_events(task, subject, montage, as_df=True): """Returns a DataFrame of the events. Parameters ---------- task: str The experiment name (ex: RAM_TH1, RAM_FR1, ...). subject: str The subject code montage: int The montage number for the subject as_df: bool If true, the events will returned as a pandas.DataFrame, otherwise a numpy.recarray Returns ------- pandas.DataFrame A DataFrame of of the events """ task = task.replace('RAM_', '') # if a RAM task, get info from r1 database and load as df using cmlreader if task in r1_data.experiment.unique(): # get list of sessions for this subject, experiment, montage inds = (r1_data['subject'] == subject) & (r1_data['experiment'] == task) & (r1_data['montage'] == int(montage)) sessions = r1_data[inds]['session'].unique() # load all and concat events = pd.concat([ CMLReader(subject=subject, experiment=task, session=session).load('events') for session in sessions ]) if not as_df: events = events.to_records(index=False) # otherwise load matlab files else: subj_file = subject + '_events.mat' if int(montage) != 0: subj_file = subject + '_' + str(montage) + '_events.mat' subj_ev_path = str(os.path.join('/data/events/', task, subj_file)) events = loadmat(subj_ev_path, squeeze_me=True)['events'] events.dtype.names = [ 'item_name' if i == 'item' else i for i in events.dtype.names ] if as_df: events = pd.DataFrame.from_records(events) return events
def test_read_categories(self, kind, read_categories): from cmlreaders.path_finder import PathFinder from cmlreaders.readers.electrodes import ( ElectrodeCategoriesReader, MontageReader ) cpath = datafile("R1111M_electrode_categories.txt") categories = ElectrodeCategoriesReader.fromfile(cpath) mpath = datafile("R1111M_{}.json".format(kind)) with ExitStack() as stack: stack.enter_context(patched_cmlreader()) stack.enter_context(patch.object(PathFinder, "find", return_value="")) stack.enter_context(patch.object(ElectrodeCategoriesReader, "load", return_value=categories)) stack.enter_context(patch.object(MontageReader, "_file_path", mpath)) reader = CMLReader("R1111M", "FR1", 0, 0, 0) df = reader.load(kind, read_categories=read_categories) self.assert_categories_correct(df, categories, read_categories)