def test_load_save_raw_preproc_kwargs(setup_concat_raw_dataset, tmpdir): concat_raw_dataset = setup_concat_raw_dataset preprocess(concat_raw_dataset, [ Preprocessor('pick_channels', ch_names=['C3']), ]) concat_raw_dataset.save(tmpdir, overwrite=False) for i in range(len(concat_raw_dataset.datasets)): assert os.path.exists( os.path.join(tmpdir, str(i), 'raw_preproc_kwargs.json')) loaded_concat_raw_dataset = load_concat_dataset(tmpdir, preload=False) for ds in loaded_concat_raw_dataset.datasets: assert ds.raw_preproc_kwargs == [ ('pick_channels', { 'ch_names': ['C3'] }), ]
def test_load_save_window_preproc_kwargs(setup_concat_windows_dataset, tmpdir): concat_windows_dataset = setup_concat_windows_dataset concat_windows_dataset.save(tmpdir, overwrite=False) for i in range(len(concat_windows_dataset.datasets)): subdir = os.path.join(tmpdir, str(i)) assert os.path.exists(os.path.join(subdir, 'window_kwargs.json')) preprocess(concat_windows_dataset, [ Preprocessor('pick_channels', ch_names=['Cz']), ]) concat_windows_dataset.save(tmpdir, overwrite=True) for i in range(len(concat_windows_dataset.datasets)): subdir = os.path.join(tmpdir, str(i)) assert os.path.exists(os.path.join(subdir, 'window_kwargs.json')) assert os.path.exists( os.path.join(subdir, 'window_preproc_kwargs.json')) loaded_concat_windows_dataset = load_concat_dataset(tmpdir, preload=False) for ds in loaded_concat_windows_dataset.datasets: assert ds.window_kwargs == [('create_windows_from_events', { 'infer_mapping': True, 'infer_window_size_stride': True, 'trial_start_offset_samples': 0, 'trial_stop_offset_samples': 0, 'window_size_samples': None, 'window_stride_samples': None, 'drop_last_window': False, 'mapping': { 'feet': 0, 'left_hand': 1, 'right_hand': 2, 'tongue': 3 }, 'preload': False, 'drop_bad_windows': True, 'picks': None, 'reject': None, 'flat': None, 'on_missing': 'error', 'accepted_bads_ratio': 0.0 })] assert ds.window_preproc_kwargs == [ ('pick_channels', { 'ch_names': ['Cz'] }), ]
def prepare_data(n_recs, save, preload, n_jobs): if save: tmp_dir = tempfile.TemporaryDirectory() save_dir = tmp_dir.name else: save_dir = None # (1) Load the data concat_ds = SleepPhysionet(subject_ids=range(n_recs), recording_ids=[1], crop_wake_mins=30, preload=preload) sfreq = concat_ds.datasets[0].raw.info['sfreq'] # (2) Preprocess the continuous data preprocessors = [ Preprocessor('crop', tmin=10), Preprocessor('filter', l_freq=None, h_freq=30) ] preprocess(concat_ds, preprocessors, save_dir=save_dir, overwrite=True, n_jobs=n_jobs) # (3) Window the data windows_ds = create_fixed_length_windows(concat_ds, 0, None, int(30 * sfreq), int(30 * sfreq), True, preload=preload, n_jobs=n_jobs) # Preprocess the windowed data preprocessors = [Preprocessor(scale, channel_wise=True)] preprocess(windows_ds, preprocessors, save_dir=save_dir, overwrite=True, n_jobs=n_jobs)
# ~~~~~~~~~~~~~ # # Next, we preprocess the raw data. We convert the data to microvolts and apply # a lowpass filter. from braindecode.preprocessing import preprocess, Preprocessor, scale high_cut_hz = 30 preprocessors = [ Preprocessor(scale, factor=1e6, apply_on_array=True), Preprocessor('filter', l_freq=None, h_freq=high_cut_hz) ] # Transform the data preprocess(dataset, preprocessors) ###################################################################### # Extract windows # ~~~~~~~~~~~~~~~ # # We extract 30-s windows to be used in the classification task. # The Eldele2021 model takes a single channel as input. Here, the Fpz-Cz channel is used as it # was found to give better performance than using the Pz-Oz channel from braindecode.preprocessing import create_windows_from_events mapping = { # We merge stages 3 and 4 following AASM standards. 'Sleep stage W': 0, 'Sleep stage 1': 1, 'Sleep stage 2': 2,
Preprocessor('resample', sfreq=sfreq), ] ############################################################################### # The preprocessing loop works as follows. For every recording, we apply the # preprocessors as defined above. Then, we update the description of the rec, # since we have altered the duration, the reference, and the sampling # frequency. Afterwards, we store each recording to a unique subdirectory that # is named corresponding to the rec id. To save memory we delete the raw # dataset after storing. This gives us the option to try different windowing # parameters after reloading the data. OUT_PATH = tempfile.mkdtemp() # plaese insert actual output directory here tuh_splits = tuh.split([[i] for i in range(len(tuh.datasets))]) for rec_i, tuh_subset in tuh_splits.items(): preprocess(tuh_subset, preprocessors) # update description of the recording(s) tuh_subset.set_description( { 'sfreq': len(tuh_subset.datasets) * [sfreq], 'reference': len(tuh_subset.datasets) * ['ar'], 'n_samples': [len(d) for d in tuh_subset.datasets], }, overwrite=True) # create one directory for every recording rec_path = os.path.join(OUT_PATH, str(rec_i)) if not os.path.exists(rec_path): os.makedirs(rec_path) tuh_subset.save(rec_path)
factor_new = 1e-3 init_block_size = 1000 preprocessors = [ Preprocessor('pick_types', eeg=True, meg=False, stim=False), # Keep EEG sensors Preprocessor(lambda x: x * 1e6), # Convert from V to uV Preprocessor('filter', l_freq=low_cut_hz, h_freq=high_cut_hz), # Bandpass filter Preprocessor( exponential_moving_standardize, # Exponential moving standardization factor_new=factor_new, init_block_size=init_block_size) ] preprocess(dataset, preprocessors) ###################################################################### # Extracting windows # ~~~~~~~~~~~~~~~~~~ # from braindecode.preprocessing import create_windows_from_events trial_start_offset_seconds = -0.5 # Extract sampling frequency, check that they are same in all datasets sfreq = dataset.datasets[0].raw.info['sfreq'] assert all([ds.raw.info['sfreq'] == sfreq for ds in dataset.datasets]) # Calculate the trial start offset in samples. trial_start_offset_samples = int(trial_start_offset_seconds * sfreq)
def test_variable_length_trials_cropped_decoding(): cuda = False set_random_seeds(seed=20210726, cuda=cuda) # create fake tuh abnormal dataset tuh = _TUHAbnormalMock(path='') # fake variable length trials by cropping first recording splits = tuh.split([[i] for i in range(len(tuh.datasets))]) preprocess( concat_ds=splits['0'], preprocessors=[ Preprocessor('crop', tmax=300), ], ) variable_tuh = BaseConcatDataset( [splits[str(i)] for i in range(len(tuh.datasets))]) # make sure we actually have different length trials assert any(np.diff([ds.raw.n_times for ds in variable_tuh.datasets]) != 0) # create windows variable_tuh_windows = create_fixed_length_windows( concat_ds=variable_tuh, window_size_samples=1000, window_stride_samples=1000, drop_last_window=False, mapping={ True: 1, False: 0 }, ) # create train and valid set splits = variable_tuh_windows.split( [[i] for i in range(len(variable_tuh_windows.datasets))]) variable_tuh_windows_train = BaseConcatDataset( [splits[str(i)] for i in range(len(tuh.datasets) - 1)]) variable_tuh_windows_valid = BaseConcatDataset( [splits[str(len(tuh.datasets) - 1)]]) for x, y, ind in variable_tuh_windows_train: break train_split = predefined_split(variable_tuh_windows_valid) # initialize a model model = ShallowFBCSPNet( in_chans=x.shape[0], n_classes=len(tuh.description.pathological.unique()), ) to_dense_prediction_model(model) if cuda: model.cuda() # create and train a classifier clf = EEGClassifier( model, cropped=True, criterion=CroppedLoss, criterion__loss_function=torch.nn.functional.nll_loss, optimizer=torch.optim.Adam, batch_size=32, callbacks=['accuracy'], train_split=train_split, ) clf.fit(variable_tuh_windows_train, y=None, epochs=3) # make sure it does what we expect np.testing.assert_allclose( clf.history[:, 'train_loss'], np.array([ 0.689495325088501, 0.1353449523448944, 0.006638816092163324, ]), rtol=1e-1, atol=1e-1, ) np.testing.assert_allclose( clf.history[:, 'valid_loss'], np.array([ 2.925871, 3.611423, 4.23494, ]), rtol=1e-1, atol=1e-1, )
# `torchvision <https://pytorch.org/docs/stable/torchvision/index.html>`__. # from braindecode.preprocessing import (exponential_moving_standardize, preprocess, Preprocessor) low_cut_hz = 1. # low cut frequency for filtering high_cut_hz = 200. # high cut frequency for filtering, for ECoG higher than for EEG # Parameters for exponential moving standardization factor_new = 1e-3 init_block_size = 1000 ###################################################################### # We select only first 30 seconds from each dataset to limit time and memory # to run this example. To obtain results on the whole datasets you should remove this line. preprocess(dataset, [Preprocessor('crop', tmin=0, tmax=30)]) ###################################################################### # In time series targets setup, targets variables are stored in mne.Raw object as channels # of type `misc`. Thus those channels have to be selected for further processing. However, # many mne functions ignore `misc` channels and perform operations only on data channels # (see https://mne.tools/stable/glossary.html#term-data-channels). preprocessors = [ Preprocessor('pick_types', ecog=True, misc=True), Preprocessor(lambda x: x / 1e6, picks='ecog'), # Convert from V to uV Preprocessor('filter', l_freq=low_cut_hz, h_freq=high_cut_hz), # Bandpass filter Preprocessor( exponential_moving_standardize, # Exponential moving standardization factor_new=factor_new, init_block_size=init_block_size,
# from braindecode.preprocessing import (exponential_moving_standardize, preprocess, Preprocessor) low_cut_hz = 1. # low cut frequency for filtering high_cut_hz = 200. # high cut frequency for filtering, for ECoG higher than for EEG # Parameters for exponential moving standardization factor_new = 1e-3 init_block_size = 1000 ###################################################################### # We select only first 30 seconds from the training dataset to limit time and memory # to run this example. We split training dataset into train and validation (only 6 seconds). # To obtain full results whole datasets should be used. valid_set = preprocess(copy.deepcopy(train_set), [Preprocessor('crop', tmin=24, tmax=30)]) preprocess(train_set, [Preprocessor('crop', tmin=0, tmax=24)]) preprocess(test_set, [Preprocessor('crop', tmin=0, tmax=24)]) ###################################################################### # In time series targets setup, targets variables are stored in mne.Raw object as channels # of type `misc`. Thus those channels have to be selected for further processing. However, # many mne functions ignore `misc` channels and perform operations only on data channels # (see https://mne.tools/stable/glossary.html#term-data-channels). preprocessors = [ # TODO: ensure that misc is not removed Preprocessor('pick_types', ecog=True, misc=True), Preprocessor(lambda x: x / 1e6, picks='ecog'), # Convert from V to uV Preprocessor('filter', l_freq=low_cut_hz, h_freq=high_cut_hz), # Bandpass filter Preprocessor(
# Next, we apply the preprocessors on the selected recordings in parallel. # We additionally use the serialization functionality of # :func:`braindecode.preprocessing.preprocess` to limit memory usage during # preprocessing (as each file must be loaded into memory for some of the # preprocessing steps to work). This also makes it possible to use the lazy # loading capabilities of :class:`braindecode.datasets.BaseConcatDataset`, as # the preprocessed data is automatically reloaded with ``preload=False``. # # .. note:: # Here we use ``n_jobs=2`` as the machines the documentation is build on # only have two cores. This number should be modified based on the machine # that is available for preprocessing. OUT_PATH = tempfile.mkdtemp() # please insert actual output directory here tuh_preproc = preprocess(concat_ds=tuh, preprocessors=preprocessors, n_jobs=N_JOBS, save_dir=OUT_PATH) ############################################################################### # We can finally generate compute windows. The resulting dataset is now ready # to be used for model training. window_size_samples = 1000 window_stride_samples = 1000 # generate compute windows here and store them to disk tuh_windows = create_fixed_length_windows( tuh_preproc, window_size_samples=window_size_samples, window_stride_samples=window_stride_samples, drop_last_window=False, n_jobs=N_JOBS,
from braindecode.datautil import load_concat_dataset from braindecode.preprocessing import create_windows_from_events ############################################################################### # First, we load some dataset using MOABB. dataset = MOABBDataset( dataset_name='BNCI2014001', subject_ids=[1], ) ############################################################################### # We can apply preprocessing steps to the dataset. It is also possible to skip # this step and not apply any preprocessing. preprocess( concat_ds=dataset, preprocessors=[Preprocessor(fn='resample', sfreq=10)] ) ############################################################################### # We save the dataset to a an existing directory. It will create a '.fif' file # for every dataset in the concat dataset. Additionally it will create two # JSON files, the first holding the description of the dataset, the second # holding the name of the target. If you want to store to the same directory # several times, for example due to trying different preprocessing, you can # choose to overwrite the existing files. tmpdir = tempfile.mkdtemp() # write in a temporary directory dataset.save( path=tmpdir, overwrite=False, )
# We can iterate through ds which yields one time point of a continuous signal x, # and a target y (which can be None if targets are not defined for the entire # continuous signal). for x, y in dataset: print(x.shape, y) break ############################################################################## # We can apply preprocessing transforms that are defined in mne and work # in-place, such as resampling, bandpass filtering, or electrode selection. preprocessors = [ Preprocessor('pick_types', eeg=True, meg=False, stim=True), Preprocessor('resample', sfreq=100) ] print(dataset.datasets[0].raw.info["sfreq"]) preprocess(dataset, preprocessors) print(dataset.datasets[0].raw.info["sfreq"]) ############################################################################### # We can easily split ds based on a criteria applied to the description # DataFrame: subsets = dataset.split("session") print({subset_name: len(subset) for subset_name, subset in subsets.items()}) ############################################################################### # Next, we use a windower to extract events from the dataset based on events: windows_dataset = create_windows_from_events(dataset, trial_start_offset_samples=0, trial_stop_offset_samples=100, window_size_samples=400, window_stride_samples=100,