def load_train_valid_tuh(n_subjects, n_seconds, ids_to_load): path = '/home/schirrmr/data/preproced-tuh/all-sensors-32-hz/' log.info("Load concat dataset...") dataset = load_concat_dataset(path, preload=False, ids_to_load=ids_to_load) whole_train_set = dataset.split('session')['train'] n_max_minutes = int(np.ceil(n_seconds / 60) + 2) sfreq = whole_train_set.datasets[0].raw.info['sfreq'] log.info("Preprocess concat dataset...") preprocess(whole_train_set, [ MNEPreproc('crop', tmin=0, tmax=n_max_minutes * 60, include_tmax=True), NumpyPreproc(fn=lambda x: np.clip(x, -80, 80)), NumpyPreproc(fn=lambda x: x / 3), NumpyPreproc(fn=exponential_moving_demean, init_block_size=int(sfreq * 10), factor_new=1 / (sfreq * 5)), ]) subject_datasets = whole_train_set.split('subject') n_split = int(np.round(n_subjects * 0.75)) keys = list(subject_datasets.keys()) train_sets = [ d for i in range(n_split) for d in subject_datasets[keys[i]].datasets ] train_set = BaseConcatDataset(train_sets) valid_sets = [ d for i in range(n_split, n_subjects) for d in subject_datasets[keys[i]].datasets ] valid_set = BaseConcatDataset(valid_sets) train_set = create_fixed_length_windows( train_set, start_offset_samples=60 * 32, stop_offset_samples=60 * 32 + 32 * n_seconds, preload=True, window_size_samples=128, window_stride_samples=64, drop_last_window=True, ) valid_set = create_fixed_length_windows( valid_set, start_offset_samples=60 * 32, stop_offset_samples=60 * 32 + 32 * n_seconds, preload=True, window_size_samples=128, window_stride_samples=64, drop_last_window=True, ) return train_set, valid_set
def windows_concat_ds(base_concat_ds): return create_fixed_length_windows(base_concat_ds, start_offset_samples=100, stop_offset_samples=0, window_size_samples=1000, window_stride_samples=1000, drop_last_window=True, mapping=None, preload=True)
def windows_ds(): raws, description = fetch_data_with_moabb(dataset_name='BNCI2014001', subject_ids=4) ds = [BaseDataset(raws[i], description.iloc[i]) for i in range(3)] concat_ds = BaseConcatDataset(ds) windows_ds = create_fixed_length_windows(concat_ds=concat_ds, start_offset_samples=0, stop_offset_samples=None, window_size_samples=500, window_stride_samples=500, drop_last_window=False, preload=False) return windows_ds
def load_example_data(preload, window_len_s, n_subjects=10): """Create windowed dataset from subjects of the TUH Abnormal dataset. Parameters ---------- preload: bool If True, use eager loading, otherwise use lazy loading. n_subjects: int Number of subjects to load. Returns ------- windows_ds: BaseConcatDataset Windowed data. .. warning:: The recordings from the TUH Abnormal corpus do not all share the same sampling rate. The following assumes that the files have already been resampled to a common sampling rate. """ subject_ids = list(range(n_subjects)) ds = TUHAbnormal(TUH_PATH, subject_ids=subject_ids, target_name='pathological', preload=preload) fs = ds.datasets[0].raw.info['sfreq'] window_len_samples = int(fs * window_len_s) window_stride_samples = int(fs * 4) # window_stride_samples = int(fs * window_len_s) windows_ds = create_fixed_length_windows( ds, start_offset_samples=0, stop_offset_samples=None, window_size_samples=window_len_samples, window_stride_samples=window_stride_samples, drop_last_window=True, preload=preload, drop_bad_windows=True) # Drop bad epochs # XXX: This could be parallelized. # XXX: Also, this could be implemented in the Dataset object itself. for ds in windows_ds.datasets: ds.windows.drop_bad() assert ds.windows.preload == preload return windows_ds
from braindecode.datasets import MOABBDataset from braindecode.datautil.preprocess import preprocess, zscore, scale, \ Preprocessor, filterbank, exponential_moving_demean, \ exponential_moving_standardize, MNEPreproc, NumpyPreproc from braindecode.datautil.windowers import create_fixed_length_windows # We can't use fixtures with scope='module' as the dataset objects are modified # inplace during preprocessing. To avoid the long setup time caused by calling # the dataset/windowing functions multiple times, we instantiate the dataset # objects once and deep-copy them in fixture. raw_ds = MOABBDataset(dataset_name='BNCI2014001', subject_ids=[1, 2]) windows_ds = create_fixed_length_windows(raw_ds, start_offset_samples=100, stop_offset_samples=None, window_size_samples=1000, window_stride_samples=1000, drop_last_window=True, mapping=None, preload=True) @pytest.fixture def base_concat_ds(): return copy.deepcopy(raw_ds) @pytest.fixture def windows_concat_ds(): return copy.deepcopy(windows_ds)
for rec_i, tuh_subset in tuh_splits.items(): # implement preprocess for BaseDatasets? Would remove necessity # to split above preprocess(tuh_subset, preprocessors) # update description of the recording(s) tuh_subset.description.sfreq = len(tuh_subset.datasets) * [sfreq] tuh_subset.description.reference = len(tuh_subset.datasets) * ['ar'] tuh_subset.description.n_samples = [len(d) for d in tuh_subset.datasets] if create_compute_windows: # generate compute windows here and store them to disk tuh_windows = create_fixed_length_windows( tuh_subset, start_offset_samples=0, stop_offset_samples=None, window_size_samples=window_size_samples, window_stride_samples=window_stride_samples, drop_last_window=False ) # save memory by deleting raw recording del tuh_subset # store the number of windows required for loading later on tuh_windows.description["n_windows"] = [len(d) for d in tuh_windows.datasets] # create one directory for every recording rec_path = os.path.join(OUT_PATH, str(rec_i)) if not os.path.exists(rec_path): os.makedirs(rec_path) save_concat_dataset(rec_path, tuh_windows) out_i += 1
figsize=((max_i + 1) * 7, 5), sharex=True, sharey=True) for i, (x, y, window_ind) in enumerate(windows_ds): ax_arr[i].plot(x.T) ax_arr[i].set_ylim(-0.0002, 0.0002) ax_arr[i].set_title(f"label={y}") if i == max_i: break ############################################################################### # Alternatively, we can create evenly spaced ("sliding") windows using a # different windower. sliding_windows_ds = create_fixed_length_windows(ds, start_offset_samples=0, stop_offset_samples=0, window_size_samples=1200, window_stride_samples=1000, drop_last_window=False) print(len(sliding_windows_ds)) for x, y, window_ind in sliding_windows_ds: print(x.shape, y, window_ind) break ############################################################################### # Transforms can also be applied on windows in the same way as shown # above on continuous data: def crop_windows(windows, start_offset_samples, stop_offset_samples): fs = windows.info["sfreq"]
figsize=((max_i + 1) * 7, 5), sharex=True, sharey=True) for i, (x, y, supercrop_ind) in enumerate(windows_ds): ax_arr[i].plot(x.T) ax_arr[i].set_ylim(-0.0002, 0.0002) ax_arr[i].set_title(f"label={y}") if i == max_i: break ############################################################################### # Alternatively, we can create evenly spaced ("sliding") windows using a # different windower. sliding_windows_ds = create_fixed_length_windows(ds, start_offset_samples=0, stop_offset_samples=None, supercrop_size_samples=1200, supercrop_stride_samples=1000, drop_samples=False) print(len(sliding_windows_ds)) for x, y, supercrop_ind in sliding_windows_ds: print(x.shape, y, supercrop_ind) break ############################################################################### # Transforms can also be applied on supercrops/windows in the same way as shown # above on continuous data: def crop_windows(windows, start_offset_samples, stop_offset_samples): fs = windows.info["sfreq"]
def create_from_X_y(X, y, drop_last_window, sfreq=None, ch_names=None, window_size_samples=None, window_stride_samples=None): """Create a BaseConcatDataset of WindowsDatasets from X and y to be used for decoding with skorch and braindecode, where X is a list of pre-cut trials and y are corresponding targets. Parameters ---------- X: array-like list of pre-cut trials as n_trials x n_channels x n_times y: array-like targets corresponding to the trials sfreq: common sampling frequency of all trials ch_names: array-like channel names of the trials drop_last_window: bool whether or not have a last overlapping window, when windows/windows do not equally divide the continuous signal window_size_samples: int window size window_stride_samples: int stride between windows Returns ------- windows_datasets: BaseConcatDataset X and y transformed to a dataset format that is compatible with skorch and braindecode """ n_samples_per_x = [] base_datasets = [] if sfreq is None: sfreq = 100 log.info("No sampling frequency given, set to 100 Hz.") if ch_names is None: ch_names = [str(i) for i in range(X.shape[1])] log.info(f"No channel names given, set to 0-{X.shape[1]}).") for x, target in zip(X, y): n_samples_per_x.append(x.shape[1]) info = mne.create_info(ch_names=ch_names, sfreq=sfreq) raw = mne.io.RawArray(x, info) base_dataset = BaseDataset(raw, pd.Series({"target": target}), target_name="target") base_datasets.append(base_dataset) base_datasets = BaseConcatDataset(base_datasets) if window_size_samples is None and window_stride_samples is None: if not len(np.unique(n_samples_per_x)) == 1: raise ValueError(f"if 'window_size_samples' and " f"'window_stride_samples' are None, " f"all trials have to have the same length") window_size_samples = n_samples_per_x[0] window_stride_samples = n_samples_per_x[0] windows_datasets = create_fixed_length_windows( base_datasets, start_offset_samples=0, stop_offset_samples=0, window_size_samples=window_size_samples, window_stride_samples=window_stride_samples, drop_last_window=drop_last_window) return windows_datasets