Пример #1
0
def read_edf(path, downsample):
    """Read data from a European Data Format (edf) file.

    Use phypno class for reading EDF files:
        http: // phypno.readthedocs.io / api / phypno.ioeeg.edf.html

    Parameters
    ----------
    path: str
        Filename(with full path) to EDF file
    downsample : int
        Down-sampling frequency.

    Returns
    -------
    sf : int
        The sampling frequency.
    data : array_like
        The data organised as well(n_channels, n_points)
    chan : list
        The list of channel's names.
    n : int
        Number of points in the original data
    start_time : array_like
        Starting time of the recording (hh:mm:ss)
    annotations : array_like
        Array of annotations.
    """
    assert os.path.isfile(path)

    from ..utils.sleep.edf import Edf

    edf = Edf(path)

    # Return header informations
    _, start_time, sf, chan, n_samples, _ = edf.return_hdr()
    start_time = start_time.time()

    # Keep only data channels (e.g excludes marker chan)
    freqs = np.unique(
        edf.hdr['n_samples_per_record']) / edf.hdr['record_length']
    sf = freqs.max()

    if len(freqs) != 1:
        bad_chans = np.where(edf.hdr['n_samples_per_record'] < sf)
        chan = np.delete(chan, bad_chans)

    # Load all samples of selected channels
    np.seterr(divide='ignore', invalid='ignore')
    data = edf.return_dat(chan, 0, n_samples)

    # Get original signal length :
    n = data.shape[1]

    # Get down-sample factor :
    sf = float(sf)
    chan = list(chan)
    dsf, downsample = get_dsf(downsample, sf)

    return sf, downsample, dsf, data[:, ::dsf], chan, n, start_time, None
Пример #2
0
 def test_get_dsf(self):
     """Test function get_dsf."""
     assert get_dsf(100, 1000.) == (10, 100.)
     assert get_dsf(100, None) == (1, 100.)
Пример #3
0
def read_elan(path, downsample):
    """Read data from a ELAN (eeg) file.

    Elan format specs: http: // elan.lyon.inserm.fr/

    Parameters
    ----------
    path : str
        Filename(with full path) to Elan .eeg file
    downsample : int
        Down-sampling frequency.

    Returns
    -------
    sf : int
        The sampling frequency.
    data : array_like
        The data organised as well(n_channels, n_points)
    chan : list
        The list of channel's names.
    n : int
        Number of samples before down-sampling.
    start_time : array_like
        Starting time of the recording (hh:mm:ss)
    annotations : array_like
        Array of annotations.
    """
    header = path + '.ent'

    assert os.path.isfile(path)
    assert os.path.isfile(header)

    # Read .ent file
    ent = np.genfromtxt(header,
                        delimiter='\n',
                        usecols=[0],
                        dtype=None,
                        skip_header=0,
                        encoding='utf-8')

    # eeg file version
    eeg_version = ent[0]

    if eeg_version == 'V2':
        nb_oct = 2
        formread = '>i2'
    elif eeg_version == 'V3':
        nb_oct = 4
        formread = '>i4'

    # Sampling rate
    sf = 1. / float(ent[8])

    # Record starting time
    if ent[4] != "No time":
        hour, minutes, sec = ent[4].split(':')
        start_time = datetime.time(int(hour), int(minutes), int(sec))
        day, month, year = ent[3].split(':')
    else:
        start_time = datetime.time(0, 0, 0)

    # Channels
    nb_chan = np.int(ent[9])
    nb_chan = nb_chan

    # Last 2 channels do not contain data
    nb_chan_data = nb_chan - 2
    chan_list = slice(nb_chan_data)
    chan = ent[10:10 + nb_chan_data]

    # Gain
    gain = np.zeros(nb_chan)
    offset1 = 9 + 3 * nb_chan
    offset2 = 9 + 4 * nb_chan
    offset3 = 9 + 5 * nb_chan
    offset4 = 9 + 6 * nb_chan

    for i in np.arange(1, nb_chan + 1):

        min_an = float(ent[offset1 + i])
        max_an = float(ent[offset2 + i])
        min_num = float(ent[offset3 + i])
        max_num = float(ent[offset4 + i])

        gain[i - 1] = (max_an - min_an) / (max_num - min_num)
    if gain.dtype != np.float32:
        gain = gain.astype(np.float32, copy=False)

    # Load memmap
    nb_bytes = os.path.getsize(path)
    nb_samples = int(nb_bytes / (nb_oct * nb_chan))

    m_raw = np.memmap(path,
                      dtype=formread,
                      mode='r',
                      shape=(nb_chan, nb_samples),
                      order={'F'})

    # Get original signal length :
    n = m_raw.shape[1]

    # Get down-sample factor :
    sf = float(sf)
    chan = list(chan)
    dsf, downsample = get_dsf(downsample, sf)

    # Multiply by gain :
    data = m_raw[chan_list, ::dsf] * \
        gain[chan_list][..., np.newaxis]

    return sf, downsample, dsf, data, chan, n, start_time, None
Пример #4
0
def read_bva(path, downsample, read_markers=False):
    """Read data from a BrainVision (*.vhdr) file.

    Poor man's version of https: // gist.github.com / breuderink / 6266871

    Assumes that data are saved with the following parameters:
        - Data format: Binary
        - Orientation: Multiplexed
        - Format: int16

    Parameters
    ----------
    path : str
        Filename(with full path) to .vhdr file. Data file must be in the
        same directory.
    downsample : int
        Down-sampling frequency.
    read_markers : bool | False
        Import markers from the .vmrk files as annotations

    Returns
    -------
    sf : float
        The sampling frequency.
    data : array_like
        The data organised as well(n_channels, n_points)
    chan : list
        The list of channel's names.
    n : int
        Number of points before down-sampling.
    start_time : array_like
        Starting time of the recording (hh:mm:ss)
    annotations : array_like
        Array of annotations.
    """
    import re

    assert os.path.isfile(path)

    # Read header
    ent = np.genfromtxt(path,
                        delimiter='\n',
                        usecols=[0],
                        dtype=None,
                        skip_header=0,
                        encoding='utf-8')

    for item in ent:
        if 'DataFile=' in item:
            data_file = item.split('=')[1]
            data_path = os.path.join(os.path.dirname(path), data_file)
            assert os.path.isfile(data_path)
        elif 'MarkerFile=' in item:
            marker_file = item.split('=')[1]
            marker_path = os.path.join(os.path.dirname(path), marker_file)
        elif 'NumberOfChannels=' in item:
            n_chan = int(re.findall('\d+', item)[0])
        elif 'SamplingInterval=' in item:
            si = float(re.findall("[-+]?\d*\.\d+|\d+", item)[0])
            sf = 1 / (si * 0.000001)
        elif 'DataFormat' in item:
            data_format = item.split('=')[1]
        elif 'BinaryFormat' in item:
            binary_format = item.split('=')[1]
        elif 'DataOrientation' in item:
            data_orient = item.split('=')[1]

    # Check binary format
    assert "BINARY" in data_format
    assert "INT_16" in binary_format
    assert "MULTIPLEXED" in data_orient

    # Extract channel labels and resolution
    start_label = np.array(np.where(np.char.find(ent, 'Ch1=') == 0)).min()
    chan = {}
    resolution = np.empty(shape=n_chan)

    for i, j in enumerate(range(start_label, start_label + n_chan)):
        chan[i] = re.split('\W+', ent[j])[1]
        resolution[i] = float(ent[j].split(",")[2])

    chan = np.array(list(chan.values())).flatten()

    # Read marker file (if present) to extract recording time
    if os.path.isfile(marker_path):
        vmrk = np.genfromtxt(marker_path,
                             delimiter='\n',
                             usecols=[0],
                             dtype=None,
                             skip_header=0,
                             encoding='utf-8')

        # Read start-time
        for item in vmrk:
            if 'New Segment' in item:
                st = re.split('\W+', item)[-1]
                start_time = datetime.time(int(st[8:10]), int(st[10:12]),
                                           int(st[12:14]))
                break
            else:
                start_time = datetime.time(0, 0, 0)

        # Read markers
        if read_markers:
            onsets = np.array([], dtype=float)
            durations = np.array([], dtype=float)
            descriptions = np.array([], dtype=str)
            for item in vmrk:
                if 'Mk' in item and ';' not in item:
                    onsets = np.append(
                        onsets, int(re.sub(r'\s', '', item).split(',')[2]))
                    durations = np.append(
                        durations, int(re.sub(r'\s', '', item).split(',')[3]))
                    descriptions = np.append(
                        descriptions,
                        re.sub(r'\s', '', item).split(',')[1])
                    anot = np.c_[onsets, durations, descriptions]
        else:
            anot = None

    with io.open(data_path, 'rb') as f:
        raw = f.read()
        size = int(len(raw) / 2)

        ints = np.ndarray((n_chan, int(size / n_chan)),
                          dtype='<i2',
                          order='F',
                          buffer=raw)

        data = np.float32(np.diag(resolution)).dot(ints)

    # Get original signal length :
    n = data.shape[1]

    # Get down-sample factor :
    sf = float(sf)
    chan = list(chan)
    dsf, downsample = get_dsf(downsample, sf)

    return sf, downsample, dsf, data[:, ::dsf], chan, n, start_time, anot
Пример #5
0
    def __init__(self, data, channels, sf, hypno, href, preload, use_mne,
                 downsample, kwargs_mne, annotations):
        """Init."""
        # ========================== LOAD DATA ==========================
        # Dialog window if data is None :
        if data is None:
            data = dialog_load(
                self, "Open dataset", '',
                "Any EEG files (*.vhdr *.edf *.gdf *.bdf *.eeg "
                "*.egi *.mff *.cnt *.trc *.set *.rec);;"
                "BrainVision (*.vhdr);;EDF (*.edf);;"
                "GDF (*.gdf);;BDF (*.bdf);;Elan (*.eeg);;"
                "EGI (*.egi);;MFF (*.mff);;CNT (*.cnt);;"
                "Micromed (*.trc);;EEGLab (*.set);;REC (*.rec)")
            upath = os.path.split(data)[0]
        else:
            upath = ''

        if isinstance(data, str):  # file is defined
            # ---------- USE SLEEP or MNE ----------
            # Find file extension :
            file, ext = get_file_ext(data)
            # Force to use MNE if preload is False :
            use_mne = True if not preload else use_mne
            # Get if the file has to be loaded using Sleep or MNE python :
            sleep_ext = ['.eeg', '.vhdr', '.edf', '.trc', '.rec']
            use_mne = True if ext not in sleep_ext else use_mne

            if use_mne:
                is_mne_installed(raise_error=True)

            # ---------- LOAD THE FILE ----------
            if use_mne:  # Load using MNE functions
                logger.debug("Load file using MNE-python")
                kwargs_mne['preload'] = preload
                args = mne_switch(file, ext, downsample, **kwargs_mne)
            else:  # Load using Sleep functions
                logger.debug("Load file using Sleep")
                args = sleep_switch(file, ext, downsample)
            # Get output arguments :
            (sf, downsample, dsf, data, channels, n, offset, annot) = args
            info = ("Data successfully loaded (%s):"
                    "\n- Sampling-frequency : %.2fHz"
                    "\n- Number of time points (before down-sampling): %i"
                    "\n- Down-sampling frequency : %.2fHz"
                    "\n- Number of time points (after down-sampling): %i"
                    "\n- Number of channels : %i")
            n_channels, n_pts_after = data.shape
            logger.info(
                info %
                (file + ext, sf, n, downsample, n_pts_after, n_channels))
            PROFILER("Data file loaded", level=1)

        elif isinstance(data, np.ndarray):  # array of data is defined
            if not isinstance(sf, (int, float)):
                raise ValueError("When passing raw data, the sampling "
                                 "frequency parameter, sf, must either be an "
                                 "integer or a float.")
            file = annot = None
            offset = datetime.time(0, 0, 0)
            dsf, downsample = get_dsf(downsample, sf)
            n = data.shape[1]
            data = data[:, ::dsf]
        else:
            raise IOError("The data should either be a string which refer to "
                          "the path of a file or an array of raw data of shape"
                          " (n_electrodes, n_time_points).")

        # Keep variables :
        self._file = file
        self._annot_file = np.c_[merge_annotations(annotations, annot)]
        self._N = n
        self._dsf = dsf
        self._sfori = float(sf)
        self._toffset = offset.hour * 3600. + offset.minute * 60. + \
            offset.second
        time = np.arange(n)[::dsf] / sf
        self._sf = float(downsample) if downsample is not None else float(sf)

        # ========================== LOAD HYPNOGRAM ==========================
        # Dialog window for hypnogram :
        if hypno is False:
            hypno = None
        elif hypno is None:
            hypno = dialog_load(
                self, "Open hypnogram", upath,
                "Text file (*.txt);;Elan (*.hyp);;"
                "CSV file (*.csv);;EDF+ file(*.edf);"
                ";All files (*.*)")
            hypno = None if hypno == '' else hypno
        if isinstance(hypno, np.ndarray):  # array_like
            if len(hypno) == n:
                hypno = hypno[::dsf]
            else:
                raise ValueError("Then length of the hypnogram must be the "
                                 "same as raw data")
        if isinstance(hypno, str):  # (*.hyp / *.txt / *.csv)
            hypno, _ = read_hypno(hypno, time=time, datafile=file)
            # Oversample then downsample :
            hypno = oversample_hypno(hypno, self._N)[::dsf]
            PROFILER("Hypnogram file loaded", level=1)

        # ========================== CHECKING ==========================
        # ---------- DATA ----------
        # Check data shape :
        if data.ndim is not 2:
            raise ValueError("The data must be a 2D array")
        nchan, npts = data.shape

        # ---------- CHANNELS ----------
        if (channels is None) or (len(channels) != nchan):
            warn("The number of channels must be " + str(nchan) + ". Default "
                 "channel names will be used instead.")
            channels = ['chan' + str(k) for k in range(nchan)]
        # Clean channel names :
        patterns = ['eeg', 'EEG', 'ref']
        chanc = []
        for c in channels:
            # Remove informations after . :
            c = c.split('.')[0]
            c = c.split('-')[0]
            # Exclude patterns :
            for i in patterns:
                c = c.replace(i, '')
            # Remove space :
            c = c.replace(' ', '')
            c = c.strip()
            chanc.append(c)

        # ---------- STAGE ORDER ----------
        # href checking :
        absref = ['art', 'wake', 'n1', 'n2', 'n3', 'rem']
        absint = [-1, 0, 1, 2, 3, 4]
        if href is None:
            href = absref
        elif (href is not None) and isinstance(href, list):
            # Force lower case :
            href = [k.lower() for k in href]
            # Check that all stage are present :
            for k in absref:
                if k not in href:
                    raise ValueError(k + " not found in href.")
            # Force capitalize :
            href = [k.capitalize() for k in href]
            href[href.index('Rem')] = 'REM'
        else:
            raise ValueError("The href parameter must be a list of string and"
                             " must contain 'art', 'wake', 'n1', 'n2', 'n3' "
                             "and 'rem'")
        # Conversion variable :
        absref = ['Art', 'Wake', 'N1', 'N2', 'N3', 'REM']
        conv = {absint[absref.index(k)]: absint[i] for i, k in enumerate(href)}

        # ---------- HYPNOGRAM ----------
        if hypno is None:
            hypno = np.zeros((npts, ), dtype=np.float32)
        else:
            n = len(hypno)
            # Check hypno values :
            if (hypno.min() < -1.) or (hypno.max() > 4) or (n != npts):
                warn("\nHypnogram values must be comprised between -1 and 4 "
                     "(see Iber et al. 2007). Use:\n-1 -> Art (optional)\n 0 "
                     "-> Wake\n 1 -> N1\n 2 -> N2\n 3 -> N4\n 4 -> REM\nEmpty "
                     "hypnogram will be used instead")
                hypno = np.zeros((npts, ), dtype=np.float32)

        # ---------- SCALING ----------
        # Assume that the inter-quartile amplitude of EEG data is ~50 uV
        iqr_chan = iqr(data[:, :int(data.shape[1] / 4)], axis=-1)
        bad_iqr = iqr_chan < 1.

        if np.any(bad_iqr):
            mult_fact = np.zeros_like(iqr_chan)
            iqr_chan[iqr_chan == 0.] = 1.
            mult_fact[bad_iqr] = np.floor(np.log10(50. / iqr_chan[bad_iqr]))
            data *= 10.**mult_fact[..., np.newaxis]
            warn("Wrong channel data amplitude. ")

        # ---------- CONVERSION ----------=
        # Convert data and hypno to be contiguous and float 32 (for vispy):
        self._data = vispy_array(data)
        self._hypno = vispy_array(hypno)
        self._time = vispy_array(time)
        self._channels = chanc
        self._href = href
        self._hconv = conv
        PROFILER("Check data", level=1)
Пример #6
0
def read_trc(path, downsample):
    """Read data from a Micromed (trc) file (version 4).

    Poor man's version of micromedio.py from Neo package
    (https://pythonhosted.org/neo/)

    Parameters
    ----------
    path : str
        Filename(with full path) to .trc file
    downsample : int
        Down-sampling frequency.

    Returns
    -------
    sf : float
        The sampling frequency.
    downsample : float
        The downsampling frequency
    data : array_like
        The data organised as well(n_channels, n_points)
    chan : list
        The list of channel's names.
    n : int
        Number of samples before down-sampling.
    start_time : array_like
        Starting time of the recording (hh:mm:ss)
    annotations : array_like
        Array of annotations.
    """
    import struct

    def read_f(f, fmt):
        return struct.unpack(fmt, f.read(struct.calcsize(fmt)))

    with io.open(path, 'rb') as f:
        # Read header
        f.seek(175, 0)
        header_version, = read_f(f, 'b')
        assert header_version == 4

        f.seek(138, 0)
        data_start_offset, n_chan, _, sf, nbytes = read_f(f, 'IHHHH')

        f.seek(128, 0)
        day, month, year, hour, minute, sec = read_f(f, 'bbbbbb')
        start_time = datetime.time(hour, minute, sec)

        # Raw data
        f.seek(data_start_offset, 0)
        m_raw = np.fromstring(f.read(), dtype='u' + str(nbytes))
        m_raw = m_raw.reshape((int(m_raw.size / n_chan), n_chan)).transpose()

        # Read label / gain
        gain = []
        chan = []
        logical_ground = []
        data = np.empty(shape=m_raw.shape, dtype=np.float32)

        f.seek(176, 0)
        zone_names = ['ORDER', 'LABCOD']
        zones = {}
        for zname in zone_names:
            zname2, pos, length = read_f(f, '8sII')
            zones[zname] = zname2, pos, length

        zname2, pos, length = zones['ORDER']
        f.seek(pos, 0)
        code = np.fromfile(f, dtype='u2', count=n_chan)

        for c in range(n_chan):
            zname2, pos, length = zones['LABCOD']
            f.seek(pos + code[c] * 128 + 2, 0)

            chan = np.append(chan, f.read(6).decode('utf-8').strip())
            logical_min, logical_max, logic_ground_chan, physical_min, \
                physical_max = read_f(f, 'iiiii')

            logical_ground = np.append(logical_ground, logic_ground_chan)

            gain = np.append(
                gain,
                float(physical_max - physical_min) /
                float(logical_max - logical_min + 1))

    # Multiply by gain
    m_raw = m_raw - logical_ground[:, np.newaxis]
    data = m_raw * gain[:, np.newaxis].astype(np.float32)

    # Get original signal length :
    n = data.shape[1]

    # Get down-sample factor :
    sf = float(sf)
    chan = list(chan)
    dsf, downsample = get_dsf(downsample, sf)

    return sf, downsample, dsf, data[:, ::dsf], chan, n, start_time, None
Пример #7
0
 def test_get_dsf(self):
     """Test function get_dsf."""
     assert get_dsf(100, 1000.) == (10, 100.)
     assert get_dsf(100, None) == (1, 100.)