Python preprocess示例，braindecode.preprocessing.preprocess Python示例

示例#1

0

显示文件

def test_load_save_raw_preproc_kwargs(setup_concat_raw_dataset, tmpdir):
    concat_raw_dataset = setup_concat_raw_dataset
    preprocess(concat_raw_dataset, [
        Preprocessor('pick_channels', ch_names=['C3']),
    ])
    concat_raw_dataset.save(tmpdir, overwrite=False)
    for i in range(len(concat_raw_dataset.datasets)):
        assert os.path.exists(
            os.path.join(tmpdir, str(i), 'raw_preproc_kwargs.json'))
    loaded_concat_raw_dataset = load_concat_dataset(tmpdir, preload=False)
    for ds in loaded_concat_raw_dataset.datasets:
        assert ds.raw_preproc_kwargs == [
            ('pick_channels', {
                'ch_names': ['C3']
            }),
        ]

示例#2

0

显示文件

def test_load_save_window_preproc_kwargs(setup_concat_windows_dataset, tmpdir):
    concat_windows_dataset = setup_concat_windows_dataset
    concat_windows_dataset.save(tmpdir, overwrite=False)
    for i in range(len(concat_windows_dataset.datasets)):
        subdir = os.path.join(tmpdir, str(i))
        assert os.path.exists(os.path.join(subdir, 'window_kwargs.json'))

    preprocess(concat_windows_dataset, [
        Preprocessor('pick_channels', ch_names=['Cz']),
    ])
    concat_windows_dataset.save(tmpdir, overwrite=True)
    for i in range(len(concat_windows_dataset.datasets)):
        subdir = os.path.join(tmpdir, str(i))
        assert os.path.exists(os.path.join(subdir, 'window_kwargs.json'))
        assert os.path.exists(
            os.path.join(subdir, 'window_preproc_kwargs.json'))
    loaded_concat_windows_dataset = load_concat_dataset(tmpdir, preload=False)

    for ds in loaded_concat_windows_dataset.datasets:
        assert ds.window_kwargs == [('create_windows_from_events', {
            'infer_mapping': True,
            'infer_window_size_stride': True,
            'trial_start_offset_samples': 0,
            'trial_stop_offset_samples': 0,
            'window_size_samples': None,
            'window_stride_samples': None,
            'drop_last_window': False,
            'mapping': {
                'feet': 0,
                'left_hand': 1,
                'right_hand': 2,
                'tongue': 3
            },
            'preload': False,
            'drop_bad_windows': True,
            'picks': None,
            'reject': None,
            'flat': None,
            'on_missing': 'error',
            'accepted_bads_ratio': 0.0
        })]
        assert ds.window_preproc_kwargs == [
            ('pick_channels', {
                'ch_names': ['Cz']
            }),
        ]

示例#3

0

显示文件

文件： plot_benchmark_preprocessing.py 项目： gemeinl/braindecode-1

def prepare_data(n_recs, save, preload, n_jobs):
    if save:
        tmp_dir = tempfile.TemporaryDirectory()
        save_dir = tmp_dir.name
    else:
        save_dir = None

    # (1) Load the data
    concat_ds = SleepPhysionet(subject_ids=range(n_recs),
                               recording_ids=[1],
                               crop_wake_mins=30,
                               preload=preload)
    sfreq = concat_ds.datasets[0].raw.info['sfreq']

    # (2) Preprocess the continuous data
    preprocessors = [
        Preprocessor('crop', tmin=10),
        Preprocessor('filter', l_freq=None, h_freq=30)
    ]
    preprocess(concat_ds,
               preprocessors,
               save_dir=save_dir,
               overwrite=True,
               n_jobs=n_jobs)

    # (3) Window the data
    windows_ds = create_fixed_length_windows(concat_ds,
                                             0,
                                             None,
                                             int(30 * sfreq),
                                             int(30 * sfreq),
                                             True,
                                             preload=preload,
                                             n_jobs=n_jobs)

    # Preprocess the windowed data
    preprocessors = [Preprocessor(scale, channel_wise=True)]
    preprocess(windows_ds,
               preprocessors,
               save_dir=save_dir,
               overwrite=True,
               n_jobs=n_jobs)

示例#4

0

显示文件

文件： plot_sleep_staging_eldele2021.py 项目： gemeinl/braindecode-1

# ~~~~~~~~~~~~~
#
# Next, we preprocess the raw data. We convert the data to microvolts and apply
# a lowpass filter.

from braindecode.preprocessing import preprocess, Preprocessor, scale

high_cut_hz = 30

preprocessors = [
    Preprocessor(scale, factor=1e6, apply_on_array=True),
    Preprocessor('filter', l_freq=None, h_freq=high_cut_hz)
]

# Transform the data
preprocess(dataset, preprocessors)

######################################################################
# Extract windows
# ~~~~~~~~~~~~~~~
#
# We extract 30-s windows to be used in the classification task.
# The Eldele2021 model takes a single channel as input. Here, the Fpz-Cz channel is used as it
# was found to give better performance than using the Pz-Oz channel

from braindecode.preprocessing import create_windows_from_events

mapping = {  # We merge stages 3 and 4 following AASM standards.
    'Sleep stage W': 0,
    'Sleep stage 1': 1,
    'Sleep stage 2': 2,

示例#5

0

显示文件

    Preprocessor('resample', sfreq=sfreq),
]

###############################################################################
# The preprocessing loop works as follows. For every recording, we apply the
# preprocessors as defined above. Then, we update the description of the rec,
# since we have altered the duration, the reference, and the sampling
# frequency. Afterwards, we store each recording to a unique subdirectory that
# is named corresponding to the rec id. To save memory we delete the raw
# dataset after storing. This gives us the option to try different windowing
# parameters after reloading the data.

OUT_PATH = tempfile.mkdtemp()  # plaese insert actual output directory here
tuh_splits = tuh.split([[i] for i in range(len(tuh.datasets))])
for rec_i, tuh_subset in tuh_splits.items():
    preprocess(tuh_subset, preprocessors)

    # update description of the recording(s)
    tuh_subset.set_description(
        {
            'sfreq': len(tuh_subset.datasets) * [sfreq],
            'reference': len(tuh_subset.datasets) * ['ar'],
            'n_samples': [len(d) for d in tuh_subset.datasets],
        },
        overwrite=True)

    # create one directory for every recording
    rec_path = os.path.join(OUT_PATH, str(rec_i))
    if not os.path.exists(rec_path):
        os.makedirs(rec_path)
    tuh_subset.save(rec_path)

示例#6

0

显示文件

文件： plot_data_augmentation.py 项目： MohammadJavadD/braindecode

factor_new = 1e-3
init_block_size = 1000

preprocessors = [
    Preprocessor('pick_types', eeg=True, meg=False,
                 stim=False),  # Keep EEG sensors
    Preprocessor(lambda x: x * 1e6),  # Convert from V to uV
    Preprocessor('filter', l_freq=low_cut_hz,
                 h_freq=high_cut_hz),  # Bandpass filter
    Preprocessor(
        exponential_moving_standardize,  # Exponential moving standardization
        factor_new=factor_new,
        init_block_size=init_block_size)
]

preprocess(dataset, preprocessors)

######################################################################
# Extracting windows
# ~~~~~~~~~~~~~~~~~~
#

from braindecode.preprocessing import create_windows_from_events

trial_start_offset_seconds = -0.5
# Extract sampling frequency, check that they are same in all datasets
sfreq = dataset.datasets[0].raw.info['sfreq']
assert all([ds.raw.info['sfreq'] == sfreq for ds in dataset.datasets])
# Calculate the trial start offset in samples.
trial_start_offset_samples = int(trial_start_offset_seconds * sfreq)

示例#7

0

显示文件

文件： test_variable_length_trials_decoding.py 项目： gemeinl/braindecode-1

def test_variable_length_trials_cropped_decoding():
    cuda = False
    set_random_seeds(seed=20210726, cuda=cuda)

    # create fake tuh abnormal dataset
    tuh = _TUHAbnormalMock(path='')
    # fake variable length trials by cropping first recording
    splits = tuh.split([[i] for i in range(len(tuh.datasets))])
    preprocess(
        concat_ds=splits['0'],
        preprocessors=[
            Preprocessor('crop', tmax=300),
        ],
    )
    variable_tuh = BaseConcatDataset(
        [splits[str(i)] for i in range(len(tuh.datasets))])
    # make sure we actually have different length trials
    assert any(np.diff([ds.raw.n_times for ds in variable_tuh.datasets]) != 0)

    # create windows
    variable_tuh_windows = create_fixed_length_windows(
        concat_ds=variable_tuh,
        window_size_samples=1000,
        window_stride_samples=1000,
        drop_last_window=False,
        mapping={
            True: 1,
            False: 0
        },
    )

    # create train and valid set
    splits = variable_tuh_windows.split(
        [[i] for i in range(len(variable_tuh_windows.datasets))])
    variable_tuh_windows_train = BaseConcatDataset(
        [splits[str(i)] for i in range(len(tuh.datasets) - 1)])
    variable_tuh_windows_valid = BaseConcatDataset(
        [splits[str(len(tuh.datasets) - 1)]])
    for x, y, ind in variable_tuh_windows_train:
        break
    train_split = predefined_split(variable_tuh_windows_valid)

    # initialize a model
    model = ShallowFBCSPNet(
        in_chans=x.shape[0],
        n_classes=len(tuh.description.pathological.unique()),
    )
    to_dense_prediction_model(model)
    if cuda:
        model.cuda()

    # create and train a classifier
    clf = EEGClassifier(
        model,
        cropped=True,
        criterion=CroppedLoss,
        criterion__loss_function=torch.nn.functional.nll_loss,
        optimizer=torch.optim.Adam,
        batch_size=32,
        callbacks=['accuracy'],
        train_split=train_split,
    )
    clf.fit(variable_tuh_windows_train, y=None, epochs=3)

    # make sure it does what we expect
    np.testing.assert_allclose(
        clf.history[:, 'train_loss'],
        np.array([
            0.689495325088501,
            0.1353449523448944,
            0.006638816092163324,
        ]),
        rtol=1e-1,
        atol=1e-1,
    )

    np.testing.assert_allclose(
        clf.history[:, 'valid_loss'],
        np.array([
            2.925871,
            3.611423,
            4.23494,
        ]),
        rtol=1e-1,
        atol=1e-1,
    )

示例#8

0

显示文件

文件： plot_bcic_iv_4_ecog_trial.py 项目： gemeinl/braindecode-1

#    `torchvision <https://pytorch.org/docs/stable/torchvision/index.html>`__.
#

from braindecode.preprocessing import (exponential_moving_standardize,
                                       preprocess, Preprocessor)

low_cut_hz = 1.  # low cut frequency for filtering
high_cut_hz = 200.  # high cut frequency for filtering, for ECoG higher than for EEG
# Parameters for exponential moving standardization
factor_new = 1e-3
init_block_size = 1000

######################################################################
# We select only first 30 seconds from each dataset to limit time and memory
# to run this example. To obtain results on the whole datasets you should remove this line.
preprocess(dataset, [Preprocessor('crop', tmin=0, tmax=30)])

######################################################################
# In time series targets setup, targets variables are stored in mne.Raw object as channels
# of type `misc`. Thus those channels have to be selected for further processing. However,
# many mne functions ignore `misc` channels and perform operations only on data channels
# (see https://mne.tools/stable/glossary.html#term-data-channels).
preprocessors = [
    Preprocessor('pick_types', ecog=True, misc=True),
    Preprocessor(lambda x: x / 1e6, picks='ecog'),  # Convert from V to uV
    Preprocessor('filter', l_freq=low_cut_hz,
                 h_freq=high_cut_hz),  # Bandpass filter
    Preprocessor(
        exponential_moving_standardize,  # Exponential moving standardization
        factor_new=factor_new,
        init_block_size=init_block_size,

示例#9

0

显示文件

#

from braindecode.preprocessing import (exponential_moving_standardize,
                                       preprocess, Preprocessor)

low_cut_hz = 1.  # low cut frequency for filtering
high_cut_hz = 200.  # high cut frequency for filtering, for ECoG higher than for EEG
# Parameters for exponential moving standardization
factor_new = 1e-3
init_block_size = 1000

######################################################################
# We select only first 30 seconds from the training dataset to limit time and memory
# to run this example. We split training dataset into train and validation (only 6 seconds).
# To obtain full results whole datasets should be used.
valid_set = preprocess(copy.deepcopy(train_set),
                       [Preprocessor('crop', tmin=24, tmax=30)])
preprocess(train_set, [Preprocessor('crop', tmin=0, tmax=24)])
preprocess(test_set, [Preprocessor('crop', tmin=0, tmax=24)])

######################################################################
# In time series targets setup, targets variables are stored in mne.Raw object as channels
# of type `misc`. Thus those channels have to be selected for further processing. However,
# many mne functions ignore `misc` channels and perform operations only on data channels
# (see https://mne.tools/stable/glossary.html#term-data-channels).
preprocessors = [
    # TODO: ensure that misc is not removed
    Preprocessor('pick_types', ecog=True, misc=True),
    Preprocessor(lambda x: x / 1e6, picks='ecog'),  # Convert from V to uV
    Preprocessor('filter', l_freq=low_cut_hz,
                 h_freq=high_cut_hz),  # Bandpass filter
    Preprocessor(

示例#10

0

显示文件

# Next, we apply the preprocessors on the selected recordings in parallel.
# We additionally use the serialization functionality of
# :func:`braindecode.preprocessing.preprocess` to limit memory usage during
# preprocessing (as each file must be loaded into memory for some of the
# preprocessing steps to work). This also makes it possible to use the lazy
# loading capabilities of :class:`braindecode.datasets.BaseConcatDataset`, as
# the preprocessed data is automatically reloaded with ``preload=False``.
#
# .. note::
#    Here we use ``n_jobs=2`` as the machines the documentation is build on
#    only have two cores. This number should be modified based on the machine
#    that is available for preprocessing.

OUT_PATH = tempfile.mkdtemp()  # please insert actual output directory here
tuh_preproc = preprocess(concat_ds=tuh,
                         preprocessors=preprocessors,
                         n_jobs=N_JOBS,
                         save_dir=OUT_PATH)

###############################################################################
# We can finally generate compute windows. The resulting dataset is now ready
# to be used for model training.

window_size_samples = 1000
window_stride_samples = 1000
# generate compute windows here and store them to disk
tuh_windows = create_fixed_length_windows(
    tuh_preproc,
    window_size_samples=window_size_samples,
    window_stride_samples=window_stride_samples,
    drop_last_window=False,
    n_jobs=N_JOBS,

示例#11

0

显示文件

文件： plot_load_save_datasets.py 项目： gemeinl/braindecode-1

from braindecode.datautil import load_concat_dataset
from braindecode.preprocessing import create_windows_from_events


###############################################################################
# First, we load some dataset using MOABB.
dataset = MOABBDataset(
    dataset_name='BNCI2014001',
    subject_ids=[1],
)

###############################################################################
# We can apply preprocessing steps to the dataset. It is also possible to skip
# this step and not apply any preprocessing.
preprocess(
    concat_ds=dataset,
    preprocessors=[Preprocessor(fn='resample', sfreq=10)]
)

###############################################################################
# We save the dataset to a an existing directory. It will create a '.fif' file
# for every dataset in the concat dataset. Additionally it will create two
# JSON files, the first holding the description of the dataset, the second
# holding the name of the target. If you want to store to the same directory
# several times, for example due to trying different preprocessing, you can
# choose to overwrite the existing files.

tmpdir = tempfile.mkdtemp()  # write in a temporary directory
dataset.save(
    path=tmpdir,
    overwrite=False,
)

示例#12

0

显示文件

# We can iterate through ds which yields one time point of a continuous signal x,
# and a target y (which can be None if targets are not defined for the entire
# continuous signal).
for x, y in dataset:
    print(x.shape, y)
    break

##############################################################################
# We can apply preprocessing transforms that are defined in mne and work
# in-place, such as resampling, bandpass filtering, or electrode selection.
preprocessors = [
    Preprocessor('pick_types', eeg=True, meg=False, stim=True),
    Preprocessor('resample', sfreq=100)
]
print(dataset.datasets[0].raw.info["sfreq"])
preprocess(dataset, preprocessors)
print(dataset.datasets[0].raw.info["sfreq"])

###############################################################################
# We can easily split ds based on a criteria applied to the description
# DataFrame:
subsets = dataset.split("session")
print({subset_name: len(subset) for subset_name, subset in subsets.items()})

###############################################################################
# Next, we use a windower to extract events from the dataset based on events:
windows_dataset = create_windows_from_events(dataset,
                                             trial_start_offset_samples=0,
                                             trial_stop_offset_samples=100,
                                             window_size_samples=400,
                                             window_stride_samples=100,