def test_get_data_index(self, protocol): if protocol is "all": path = resource_filename("cmlreaders.test.data", "r1.json") else: path = resource_filename("cmlreaders.test.data", protocol + ".json") with patch.object(PathFinder, "find", return_value=path): ix = CMLReader.get_data_index(protocol) assert all(ix == get_data_index(protocol))
def get_monts_and_sess_pairs(subj, exp='TH1'): """ Returns a df of mont/sess pairs for this subj in this exp. iterate easily through this list in the following format: "for (index, (mont, sess)) in monts_and_sess_pairs(subj).iterrows():" """ df = get_data_index("r1") subjexp_df = (df[(df['subject'] == subj) & (df['experiment'] == exp)])[['montage', 'session']] subjexp_df.index = range(len(subjexp_df)) return subjexp_df
def exp_df(exp='TH1'): """ Returns a df with ['subj', 'montage', 'session', 'exp'] for each subj in this experiment. Use this for iterations. """ warnings.filterwarnings('ignore') df = get_data_index("r1") df = df[df['experiment'] == exp] df['subj'] = df.pop('subject') df['exp'] = df.pop('experiment') warnings.resetwarnings() return df[['subj', 'montage', 'session', 'exp']]
def test_resting_state_connectivity(rhino_root): subject = "R1354E" index = get_data_index("r1", rhino_root) sessions = index[(index.subject == subject) & (index.experiment == 'FR1')].session.unique() all_events = [] all_resting = [] data = [] for session in sessions: reader = CMLReader(subject, 'FR1', session, rootdir=rhino_root) events = get_countdown_events(reader) resting = countdown_to_resting(events, reader.load('sources')['sample_rate']) all_events.append(events) all_resting.append(resting) eeg = read_eeg_data(reader, resting, reref=False) data.append(eeg) # Verify that events match Ethan's analysis; his events are ordered in an # odd way, so we have to sort them to make sure they match ethan = np.load(resource_filename("thetamod.test.data", "R1354E_events_ethan.npy")) assert_equal(sorted(ethan["eegoffset"]), sorted(pd.concat(all_resting).eegoffset.values)) eegs = TimeSeries.concatenate(data) eegs.data = ButterworthFilter(time_series=eegs.to_ptsa(), ).filter().values conn = get_resting_state_connectivity(eegs.to_mne(), eegs.samplerate) basename = ('{subject}_baseline3trials_network_theta-alpha.npy' .format(subject=subject)) filename = Path(rhino_root).joinpath('scratch', 'esolo', 'tmi_analysis', subject, basename) data = np.load(filename) np.savez("test_output.npz", eeg=eegs.data, my_conn=conn, ethans_conn=data, events=pd.concat(all_events, ignore_index=True).to_records(), resting=pd.concat(all_resting, ignore_index=True).to_records()) assert_almost_equal(scipy.special.logit(conn), scipy.special.logit(data), 3)
def get_cmlevents(subj, montage=None, session=None, exp='TH1'): """ Returns the reformatted events df for subj and mont. This events struct does not include pathInfo, since that isn't recorded in the system used by cmlreaders. To get pathInfo you need to use `read_path_log`. """ #------Load data index for RAM df = get_data_index("r1") #------Specify the df for this subject and exp this_df = df[(df['subject'] == subj) & (df['experiment'] == exp)] #------Find out the sessions, localization, and montage for this subject if session is None: # default to first sess session = this_df['session'].iloc[0] if montage is None: # default to first mont montage = this_df['montage'].iloc[0] #------Get more specific df this_specific_df = (this_df[(this_df['session'] == session) & (this_df['montage'] == montage)]) loc = int(this_specific_df.iloc()[0]['localization']) #-------Subjs with a montage above 0 have aliases used in log files subject_alias = this_specific_df['subject_alias'].iloc[0] # ^ use .iloc[0] bc this_specific_df has only one item #------For some subjs the sess ID system changed over time, # and we need to know the original sess ID for certain log # files access orig_sess_ID = this_specific_df['original_session'].iloc[0] if type(orig_sess_ID) == str: orig_sess_ID = np.float64(orig_sess_ID) # I do it as float first in case of NaN if orig_sess_ID == int(orig_sess_ID): orig_sess_ID = int(orig_sess_ID) if np.isnan(orig_sess_ID): orig_sess_ID = session #------Use CMLReader to read the events structure reader = CMLReader(subj, exp, session=session, montage=montage, localization=loc) events = reader.load('events') events['original_session_ID'] = orig_sess_ID events['subject_alias'] = subject_alias # remove the unhelpful and inconsistent SESS_START event events = events[events['type'] != 'SESS_START'] return events
def get_eeg_ptsa(which, reader, stim_events, buffer=50, window=900, stim_duration=500): from ptsa.data import readers, filters if which not in ("pre", "post"): raise ValueError("Specify 'pre' or 'post'") if which == "pre": rel_start = -(buffer + window) rel_stop = -buffer else: rel_start = buffer + stim_duration rel_stop = buffer + stim_duration + window idx = cmlreaders.get_data_index(rootdir=reader.rootdir) pair_file = idx.loc[(idx.subject == reader.subject) & (idx.experiment == reader.experiment) & (idx.session == reader.session)].pairs.unique()[0] talreader = readers.TalReader(filename=pair_file) channels = talreader.get_monopolar_channels() eeg = readers.EEGReader(events=stim_events, channels=channels, start_time=rel_start, end_time=rel_stop).read() if 'bipolar_pairs' not in eeg.dims: eeg = filters.MonopolarToBipolarMapper( time_series=eeg, bipolar_pairs=talreader.get_bipolar_pairs()).filter() eeg = filters.ButterworthFilter(time_series=eeg, freqs=[58., 62.], filt_type='stop').filter() return eeg
def get_reader(self, subject: Optional[str] = None, experiment: Optional[str] = None, session: Optional[int] = None) -> CMLReader: """Return a reader for loading data. Defaults to the instance's subject, experiment, and session. """ idx = get_data_index('r1', self.rootdir) subject = subject if subject is not None else self.subject experiment = experiment if experiment is not None else self.experiment session = int(session if session is not None else self.session) montage = idx.loc[(idx.subject == subject) & (idx.experiment == experiment) & (idx.session == session)].montage.unique()[0] return CMLReader(subject, experiment, session, montage=montage, rootdir=self.rootdir)
def get_resting_connectivity(self) -> np.ndarray: """Compute resting state connectivity.""" df = get_data_index(rootdir=self.rootdir) sessions = df[(df.subject == self.subject) & (df.experiment == "FR1")].session.unique() if len(sessions) == 0: raise RuntimeError("No FR1 sessions exist for %s" % self.subject) # Read EEG data for "resting" events eeg_data = [] for session in sessions: reader = self.get_reader(experiment="FR1", session=session) rate = reader.load('sources')['sample_rate'] reref = not reader.load('sources')['name'].endswith('.h5') events = connectivity.get_countdown_events(reader) resting = connectivity.countdown_to_resting(events, rate) eeg = connectivity.read_eeg_data(reader, resting, reref=reref) eeg_data.append(eeg) eegs = TimeSeries.concatenate(eeg_data) conn = connectivity.get_resting_state_connectivity( eegs.to_mne(), eegs.samplerate) return conn
import xarray as xr from ptsa.data.filters import ButterworthFilter from ptsa.data.filters import MorletWaveletFilter from ptsa.data.filters import ResampleFilter from ptsa.data.timeseries import TimeSeries from cmlreaders import CMLReader, get_data_index from scipy.stats.mstats import zscore from scipy.io import loadmat from tqdm import tqdm from glob import glob # get the r1 dataframe on import so we don't have to keep doing it try: r1_data = get_data_index("r1") except KeyError: print('r1 protocol file not found') def get_subjs_and_montages(task): """Returns a DataFrame with columns 'subject' and 'montage' listing participants in a given experiment. Parameters ---------- task: str The experiment name (ex: TH1, FR1, ...). Returns ------- pandas.DataFrame
'/Users/tungphan/PycharmProjects/autoencoder_superEEG/autoencoder_single/cmlreaders/' ) import cmlreader rhino_root = "/Volumes/RHINO/" # Instantiate the finder object finder = cml.PathFinder(subject="R1389J", experiment="catFR5", session=1, localization=0, montage=0, rootdir=rhino_root) example_data_types = ['pairs', 'task_events', 'voxel_coordinates'] for data_type in example_data_types: print(finder.find(data_type)) from cmlreaders import get_data_index r1_data = get_data_index(kind='r1', rootdir='/Volumes/RHINO/') r1_data.head() fr1_subjects = r1_data[r1_data['experiment'] == 'FR1']['subject'].unique() fr1_subjects reader = cml.CMLReader(subject="R1389J", experiment="catFR5", session=1, localization=0, montage=0, rootdir=rhino_root)
def calc_subj_pep(subj, elecs=None, method='bip', relstart=300, relstop=1301, freq_specs=(2, 120, 30), percentthresh=.95, numcyclesthresh=3, load_eeg=False, save_eeg=False, save_result=False, plot=False, kind='r1', experiment='FR1', eeg_path='~/', result_path='~/'): """ Inputs: subj - subject string elecs - list of electrode pairs (strings) method - bip or avg depending on referencing scheme freq_specs - tuple of (low_freq, high_freq, num_freqs) for background fitting in BOSC. Returns: pep_all - average Pepisode for all words at each frequency pep_rec - average Pepisode for recalled words at each frequency pep_nrec - average Pepisode for non-recalled words at each frequency subj_tscores - t-score at each frequency, comparing rec and nrec across events ** Note that tscore is not itself meaningful because events are not independent. Comparing these tscores across subjects, however, is valid. """ if save_eeg and load_eeg: raise ('Cannot save and load eeg simultaneously.') print('Subject: ', subj) if elecs is None: good_subj = pd.read_pickle( '/home1/jrudoler/Theta_Project/hippo_subject_pairs.csv') elecs = good_subj[good_subj['Subject'] == subj]['hippo_pairs'].iloc[0] subj_pepisode = None subj_recalled = None subj_tscores = None if plot: plt.figure(figsize=(12, 6)) lowfreq, highfreq, numfreqs = freq_specs print(elecs) for pair_str in elecs: chans = pair_str.split('-') data = cml.get_data_index(kind=kind) data = data[data['experiment'] == experiment] sessions = data[data['subject'] == subj]['session'].unique() pepisodes = None # events, freqs recalled = None # events, freqs tscore = None for sess in sessions: try: print('Loading session {} EEG'.format(sess)) reader = cml.CMLReader(subject=subj, experiment=experiment, session=sess) all_events = reader.load('task_events') if not os.path.exists(eeg_path): os.makedirs(eeg_path) if load_eeg: eeg = TimeSeries.from_hdf(eeg_path + 'session_' + str(sess) + '_' + pair_str) bosc = P_episode(all_events, eeg, sr=eeg.samplerate.values, lowfreq=lowfreq, highfreq=highfreq, numfreqs=numfreqs) elif method == 'bip': pairs = reader.load("pairs") # bipolar eeg bip = reader.load_eeg( scheme=pairs[pairs.label == pair_str])\ .to_ptsa().mean(['event', 'channel']) bip = ButterworthFilter(bip, freq_range=[58., 62.], filt_type='stop', order=4).filter() print("Applying BOSC method!") if save_eeg: bip.to_hdf(eeg_path + 'session_' + str(sess) + '_' + pair_str) bosc = P_episode(all_events, bip, sr=bip.samplerate.values, lowfreq=lowfreq, highfreq=highfreq, numfreqs=numfreqs) elif method == 'avg': contacts = reader.load("contacts") # average eeg eeg = reader.load_eeg( scheme=contacts).to_ptsa().mean('event') # all zeros from a broken lead leads to -inf power, # which results in a LinAlg error for log-log fit # TODO: verify this channel exclusion doesn't cause any # problems. Maybe print a message or raise an error? bad_chan_mask = ~np.all(eeg.values == 0, axis=1) contacts = contacts[bad_chan_mask] eeg = eeg[bad_chan_mask, :] avg = (eeg[contacts.label.str.contains(chans[0]) | \ contacts.label.str.contains(chans[1]), :] - eeg.mean('channel') ).mean('channel') avg = ButterworthFilter(avg, freq_range=[58., 62.], filt_type='stop', order=4).filter() if save_eeg: avg.to_hdf(eeg_path + '/session_' + str(sess) + '_' + pair_str) bosc = P_episode(all_events, avg, sr=avg.samplerate.values, lowfreq=lowfreq, highfreq=highfreq, numfreqs=numfreqs) if plot: bosc.background_fit(plot_type='session') if pepisodes is None: pepisodes = bosc.Pepisode # be careful to only use events from lists that have eeg data. # [np.isin(bosc.interest_events.list, self.lists)] recalled = bosc.interest_events.recalled.values tscore, _ = scp.ttest_ind(pepisodes[recalled], pepisodes[~recalled], axis=0) elif np.isnan(tscore).all(): tscore, _ = scp.ttest_ind(pepisodes[recalled], pepisodes[~recalled], axis=0) else: pepisodes = np.vstack([pepisodes, bosc.Pepisode]) recalled = np.hstack( [recalled, bosc.interest_events.recalled.values]) t, _ = scp.ttest_ind(pepisodes[recalled], pepisodes[~recalled], axis=0) tscore = np.vstack([tscore, t]) print("Proportion recalled:", recalled.mean()) except IndexError: print('IndexError for subject {} session {}'.format( subj, sess)) except FileNotFoundError: print('FileNotFoundError for {} session {}'.format(subj, sess)) continue if pepisodes is None: raise Exception('No working sessions') subj_pepisode = pepisodes if subj_pepisode is None else np.dstack( [subj_pepisode, pepisodes]) subj_recalled = recalled if subj_recalled is None else np.vstack( [subj_recalled, recalled]) subj_tscores = tscore if subj_tscores is None else np.vstack( [subj_tscores, tscore]) if np.isnan(subj_tscores).all(): raise Exception('Too many nan in T-scores. This problem can arise \ when there are no recalled events.') if subj_pepisode.ndim > 2: # if multiple electrode pairs, average over pairs print("Averaging over {} electrodes for subject {}".format( subj_pepisode.shape[2], subj)) subj_pepisode = subj_pepisode.mean(2) subj_recalled = subj_recalled.mean(0) subj_recalled = subj_recalled.astype(bool) if subj_tscores.ndim > 1: print(len(sessions), 'sessions') subj_tscores = np.nanmean(subj_tscores, axis=0) print('{} total events: {} recalled \ and {} non-recalled'.format(len(subj_recalled), sum(subj_recalled), sum(~subj_recalled))) pep_rec = subj_pepisode[subj_recalled, :].mean(0) pep_nrec = subj_pepisode[~subj_recalled, :].mean(0) pep_all = subj_pepisode.mean(0) if save_result: if not os.path.exists(result_path): os.makedirs(result_path) np.save(result_path + '{}_all_{}'.format(subj, method), pep_all) np.save(result_path + '{}_rec_{}'.format(subj, method), pep_rec) np.save(result_path + '{}_nrec_{}'.format(subj, method), pep_nrec) np.save(result_path + '{}_tscore_{}'.format(subj, method), subj_tscores) return pep_all, pep_rec, pep_nrec, subj_tscores