Exemplo n.º 1
0
    def _load(self, edf_file, mat_file=None):
        if mat_file is None:
            filename = ospath.basename(edf_file)[:-4]
            folder = ospath.dirname(edf_file)
            print(mat_file)
            mat_file = ospath.list_files(folder, patterns=f'{filename}*.mat')
            if len(mat_file)>0: mat_file = mat_file[0]
            if not mat_file or not os.path.exists(mat_file): 
                print('matfile {} not found'.format(mat_file))
                dir = ospath.dirname(edf_file)
                mat_file = misc.choose_file(dir, exts='mat', 
                        title='Select the corresponding MAT file by Kubios')
            
        signals, sheader, header = highlevel.read_edf(edf_file, ch_names='ECG I')
        sfreq =  sheader[0]['sample_rate']
        data = signals[0].squeeze()
        stime = header['startdate']
        self.starttime = (stime.hour * 60 + stime.minute) * 60 + stime.second
        self.data = data
        self.sfreq = sfreq
        
        try:
            mat = mat73.loadmat(mat_file, verbose=False)
            rr = mat['Res']['HRV']['Data']['RR']
            trrs = mat['Res']['HRV']['Data']['T_RR'] - self.starttime
            rrorig = mat['Res']['HRV']['Data']['T_RRorig'] - self.starttime
            corr = mat['Res']['HRV']['Data']['RRcorrtimes'] - self.starttime
            art = mat['Res']['HRV']['TimeVar']['Artifacts']
            altered = trrs[np.where(np.diff(trrs)!=rr)[0]]
            
        except:
            raise FileNotFoundError('Mat file not found.')            

        artefacts_file = edf_file[:-4] + '.npy'  
        if os.path.exists(artefacts_file):
            self.artefacts = np.load(artefacts_file)
        else:
            art = np.nan_to_num(art, nan=99)
            self.artefacts = np.repeat(art>self.threshold, repeats=2, axis=0).T.reshape([-1,2])
            self.detect_flatline()
            
        self.kubios_art = np.nan_to_num(art.squeeze())
        self.mat = mat
        self.altered = altered.squeeze()
        self.rrorig = rrorig.squeeze()
        self.trrs = trrs.squeeze()
        print(trrs[:10])
        self.corr = corr.squeeze()

        self.file = edf_file
        self.mat_file = mat_file
        self.artefacts_file = artefacts_file
        self.max_page = len(data)//sfreq//self.interval//self.gridsize
        
        self.save()
Exemplo n.º 2
0
def to_unisens(edf_file,
               unisens_folder,
               overwrite=False,
               tqdm_desc=None,
               skip_exist=False):
    pass
    # %% create unisens
    if tqdm_desc is None:
        tqdm_desc = lambda x: None
    dtype = np.int16
    code = ospath.basename(edf_file)[:-4]
    folder = ospath.dirname(edf_file)

    unisens_folder = ospath.join(unisens_folder, code)

    if skip_exist and ospath.isdir(unisens_folder): return

    # get all additional files that belong to this EDF
    add_files = ospath.list_files(folder, patterns=code + '*')
    u = Patient(unisens_folder,
                makenew=False,
                autosave=True,
                measurementId=code)
    header = read_edf_header(edf_file)
    all_labels = header['channels']
    u.starttime = header['startdate']
    u.timestampStart = header['startdate'].strftime('%Y-%m-%dT%H:%M:%S')
    u.code = code

    attribs = misc.get_attribs()
    u.group = attribs[code].get('group', 'none')
    u.gender = attribs[code].get('gender', 'none')

    u.drug_hrv = attribs[code].get('drug_hrv', 0)
    u.drug_sleep = attribs[code].get('drug_sleep', 0)

    u.age = attribs[code].get('age', -1)
    u.match = attribs[code].get('match', '')

    u.channels = str(', '.join(header['channels']))
    u.startsec = (u.starttime.hour * 60 +
                  u.starttime.minute) * 60 + u.starttime.second
    u.use_offset = 1

    # if the ECG/EEG is broken, mark it
    edfs_ecg_broken = [
        p[1] for p in misc.read_csv(cfg.edfs_discard) if p[3] == '1'
    ]
    edfs_eeg_broken = [
        p[1] for p in misc.read_csv(cfg.edfs_discard) if p[4] == '1'
    ]

    # we need to see if the eeg/emg of this file can be used
    # if one of them is broken we also remove its match from analysis
    u.ecg_broken = (code in edfs_ecg_broken) or (u.match in edfs_ecg_broken)
    u.eeg_broken = (code in edfs_eeg_broken) or (u.match in edfs_eeg_broken)

    # %% #### add ECG ##########
    ########################
    tqdm_desc(f'{code}: Reading ECG')

    if not 'ECG' in u or overwrite:
        signals, shead, header = read_edf(edf_file,
                                          ch_names=['ECG I'],
                                          digital=True,
                                          verbose=False)
        signals[:, 0:2] = np.percentile(signals, 10), np.percentile(
            signals, 90)  # trick for viewer automatic scaling
        pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
        dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

        lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
        attrib = {
            'data': signals.astype(dtype),
            'sampleRate': shead[0]['sample_rate'],
            'ch_names': 'ECG',
            'lsbValue': lsb,
            'baseline': offset,
            'unit': 'mV',
            'dmin': dmin,
            'dmax': dmax,
            'pmin': pmin,
            'pmax': pmax
        }

        SignalEntry(id='ECG.bin', parent=u).set_data(**attrib)

        u.sampling_frequency = shead[0]['sample_rate']
        u.duration = len(signals.squeeze()) // shead[0]['sample_rate']
        u.epochs_signals = signals.shape[1] // int(u.sampling_frequency) // 30

    # %%#### add EEG ##########
    ##############################
    tqdm_desc(f'{code}: Reading EEG')
    if not 'EEG' in u or overwrite:
        chs = sleep_utils.infer_eeg_channels(all_labels)
        signals, shead, header = read_edf(edf_file,
                                          ch_names=chs,
                                          digital=True,
                                          verbose=False)
        if isinstance(signals, list):
            signals = np.atleast_2d(signals[0])
            chs = chs[0]
        # trick for viewer automatic scaling
        signals[:, 0:2] = np.percentile(signals,
                                        10), np.percentile(signals, 90)
        pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
        dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

        lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
        attrib = {
            'data': signals.astype(dtype),
            'sampleRate': shead[0]['sample_rate'],
            'ch_names': chs,
            'lsbValue': lsb,
            'baseline': offset,
            'contentClass': 'EEG',
            'unit': 'uV',
            'dmin': dmin,
            'dmax': dmax,
            'pmin': pmin,
            'pmax': pmax
        }
        SignalEntry(id='EEG.bin', parent=u).set_data(**attrib)

    # %%## add EOG #########
    #######################
    if not 'EOG' in u or overwrite:
        tqdm_desc(f'{code}: Reading EOG')
        chs = sleep_utils.infer_eog_channels(all_labels)
        signals, shead, header = read_edf(edf_file,
                                          ch_names=chs,
                                          digital=True,
                                          verbose=False)
        if isinstance(signals, list):
            signals = np.atleast_2d(signals[0])
            chs = chs[0]
        # trick for viewer automatic scaling
        signals[:, 0:2] = np.percentile(signals,
                                        10), np.percentile(signals, 90)
        pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
        dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

        lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
        attrib = {
            'data': signals.astype(dtype),
            'sampleRate': shead[0]['sample_rate'],
            'ch_names': chs,
            'lsbValue': 1,
            'baseline': 0,
            'unit': 'uV',
            'dmin': dmin,
            'dmax': dmax,
            'pmin': pmin,
            'pmax': pmax
        }
        SignalEntry(id='EOG.bin', parent=u).set_data(**attrib)

    # %%#### add EMG #########

    if not 'EMG' in u or overwrite:
        tqdm_desc(f'{code}: Reading EMG')
        chs = sleep_utils.infer_emg_channels(all_labels)
        if chs != []:  # fix for 888_49272
            signals, shead, header = read_edf(edf_file,
                                              ch_names=chs,
                                              digital=True,
                                              verbose=False)
            if isinstance(signals, list):
                signals = np.atleast_2d(signals[0])
                chs = chs[0]
            # trick for viewer automatic scaling
            signals[:, 0:2] = np.percentile(signals,
                                            10), np.percentile(signals, 90)
            pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
            dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

            lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
            attrib = {
                'data': signals.astype(dtype),
                'sampleRate': shead[0]['sample_rate'],
                'ch_names': chs,
                'lsbValue': 1,
                'baseline': 0,
                'unit': 'uV',
                'dmin': dmin,
                'dmax': dmax,
                'pmin': pmin,
                'pmax': pmax
            }
            SignalEntry(id='EMG.bin', parent=u).set_data(**attrib)

    #######################################
    # %%add Thorax #########
    ######################
    if not 'thorax' in u or overwrite:
        tqdm_desc(f'{code}: Reading Thorax')
        signals, shead, header = read_edf(edf_file,
                                          ch_names=['Thorax'],
                                          digital=True,
                                          verbose=False)
        # trick for viewer automatic scaling
        signals[:, 0:2] = np.percentile(signals,
                                        10), np.percentile(signals, 90)

        pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
        dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

        lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
        attrib = {
            'data': signals.astype(dtype),
            'sampleRate': shead[0]['sample_rate'],
            'ch_names': 'thorax',
            'lsbValue': 1,
            'baseline': 0,
            'unit': 'uV',
            'dmin': dmin,
            'dmax': dmax,
            'pmin': pmin,
            'pmax': pmax
        }
        SignalEntry(id='thorax.bin', parent=u).set_data(**attrib)

    #######################################
    # %% add Body / Lagesensor #########
    ########################################
    if (not 'body' in u or overwrite) and 'Body' in all_labels:
        tqdm_desc(f'{code}: Reading Body')
        signals, shead, header = read_edf(edf_file,
                                          ch_names=['Body'],
                                          digital=True,
                                          verbose=False)
        signals[:, 0:2] = np.percentile(signals,
                                        10), np.percentile(signals, 90)

        if np.ptp(
                signals
        ) < 10:  # we have some weird body positions that we cant decode

            pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
            dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

            comment = 'Lagesensor: 1 = Bauchlage, 2 = aufrecht, 3 = links, 4 = rechts,' \
                      '5 = aufrecht (Kopfstand), 6 = Rückenlage'

            lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
            attrib = {
                'data': signals.astype(dtype),
                'sampleRate': shead[0]['sample_rate'],
                'ch_names': 'body',
                'lsbValue': 1,
                'baseline': 0,
                'unit': 'uV',
                'dmin': dmin,
                'dmax': dmax,
                'pmin': pmin,
                'pmax': pmax,
                'comment': comment
            }
            SignalEntry(id='body.bin', parent=u).set_data(**attrib)

    # %% add annotations #######
    ################################
    if not 'annotations' in u or overwrite:
        annotations = header['annotations']
        if annotations != []:
            annot_entry = EventEntry('annotations.csv', parent=u)
            annotations = [[int(a[0] * 1000), a[2]] for a in annotations]
            annot_entry.set_data(annotations,
                                 sampleRate=1000,
                                 typeLength=1,
                                 contentClass='Annotation')

    # %%#### add rest #######
    ############################
    for file in add_files:
        # ignore diagnosis files of StanfordStages
        if file.endswith(
            ('diagnosis.txt', 'hypnodensity.txt', 'hypnogram.txt')):  #
            pass
        # %% add arousals
        elif file.endswith('_arousal.txt'):
            if 'arousals' in u and not overwrite: continue
            lines = misc.read_csv(file, convert_nums=True)

            sdate = u.starttime
            data = []
            for t_arousal, length, _ in lines[4:]:
                t_arousal = f'{sdate.year}.{sdate.month}.{sdate.day} ' + t_arousal[:
                                                                                   8]
                t_arousal = datetime.strptime(t_arousal, '%Y.%m.%d %H:%M:%S')
                epoch = (t_arousal - sdate).seconds // 30
                data += [[epoch, length]]

            arousal_event = EventEntry('arousals.csv', parent=u)
            arousal_event.set_data(
                data,
                comment='Arousal appearance epoch, name is lengths in seconds',
                sampleRate=1 / 30,
                contentClass='Arousal',
                typeLength=1)
        # %% add hypnogram
        elif file.endswith('txt'):
            if 'hypnogram' in u and not overwrite: continue
            tqdm_desc(f'{code}: Reading Hypnogram')
            hypno = sleep_utils.read_hypnogram(file)
            u.epochs_hypno = len(hypno)
            times = np.arange(len(hypno))
            hypno = np.vstack([times, hypno]).T
            hypno_entry = EventEntry(id='hypnogram.csv', parent=u)
            hypno_entry.set_data(
                hypno,
                comment=f'File: {code}\nSleep stages 30s epochs.',
                sampleRate=1 / 30,
                contentClass='Stage',
                typeLength=1)

        elif file.endswith('.hypno'):
            if 'hypnogram_old' in u and not overwrite: continue
            hypno = sleep_utils.read_hypnogram(file)
            if not hasattr(u, 'epochs_hypno'): u.epochs_hypno = len(hypno)
            times = np.arange(len(hypno))
            hypno = np.vstack([times, hypno]).T
            hypno_old_entry = EventEntry(id='hypnogram_old.csv', parent=u)
            hypno_old_entry.set_data(
                hypno,
                comment=f'File: {code}\nSleep stages 30s epochs.',
                sampleRate=1 / 30,
                contentClass='Stage',
                typeLength=1)
        # %% add features and kubios
        elif file.endswith('mat'):
            if 'feats.pkl' in u and not overwrite: continue
            tqdm_desc(f'{code}: Reading Kubios')
            mat = loadmat(file)
            HRV = mat['Res']['HRV']

            feats_entry = CustomEntry('feats.pkl', parent=u)
            feats_entry.set_data(
                HRV,
                comment='pickle dump of the kubios created features file',
                fileType='pickle')

            wsize = cfg.default_wsize
            step = cfg.default_step
            offset = True
            u.compute_features()
            u.get_artefacts(wsize=wsize, step=step, offset=True)

            #%% add RRi

            tqdm_desc(f'{code}: writing RRi')

            rri_entry = CustomEntry('RRi.pkl', parent=u)
            rri_entry.set_data(
                HRV['Data']['RRi'],
                comment='raw data of RRi, the interpolated RRs at 4hz',
                fileType='pickle')
            rri_entry.sampleRate = 4

        # add artefact
        ############ removed artefact detection and calculated from kubios above
        # elif file.endswith('npy'):
        #     if  'artefacts' in u and not overwrite: continue
        #     tqdm_desc(f'{code}: Reading artefacts')
        #     art = np.load(file).ravel()
        #     u.epochs_art = len(art)//2
        #     u.artefact_percentage = np.mean(art)
        #     times = np.arange(len(art))
        #     art = np.vstack([times, art]).T
        #     artefact_entry = ValuesEntry(id='artefacts.csv', parent=u)
        #     artefact_entry.set_data(art, sampleRate=1/15, dataType='int16')

        elif file.endswith(('.edf', 'pkl')):
            pass

        else:
            raise Exception(f'unkown file type: {file}')

    u.save()
Exemplo n.º 3
0
@author: Simon
"""
import config as cfg
import misc
import ospath

# %% print info
if __name__ == '__main__':
    data_folder = cfg.folder_mnc
    files = ospath.list_files(data_folder, exts=['edf'], subfolders=True)

    info = misc.get_mnc_info()
    fullfiles = files.copy()
    files = [f[:-9] for f in files]  # remove extension & "-nsrr"
    files = [ospath.basename(file).replace(' ', '_') for file in files]

    nt1 = []
    hyp = []
    cnt = []
    missing_file = []
    missing_info = []
    missing_hypno = []
    missing_info_missing_hypno = []
    for name, full in zip(files, fullfiles):
        folder = ospath.dirname(full)
        has_hypno = True if len(
            ospath.list_files(folder, patterns=f'*{name}*.xml')) else False
        if name.upper() in info:

            item = info[name.upper()].copy()
Exemplo n.º 4
0
def to_unisens(edf_file,
               unisens_folder,
               mat_folder,
               overwrite=False,
               skip_exist=False):

    dtype = np.int16
    folder = ospath.dirname(edf_file)
    filename = ospath.basename(
        edf_file)[:-9]  # remove "-nsrr.edf" from filename

    mnc_info = misc.get_mnc_info()
    try:
        attribs = mnc_info[filename.upper().replace(' ', '_')]
    except:
        print(f'Info for {filename.upper().replace(" ", "_")} not found')
        return

    # get all additional files that belong to this EDF
    patterns = [filename + '*.xml', filename + '*.sta']
    add_files = ospath.list_files(folder, patterns=patterns)
    if len(add_files) == 0:
        print(f'No hypnogram for {filename}, skip')
        return

    # try to find mat files
    mat_files = ospath.list_files(mat_folder, patterns=[filename + '-*.mat'])
    if len(mat_files) == 0:
        print(f'No matfile found for {filename}')
        return
    elif len(mat_files) == 1:
        mat_file = mat_files[0]
    else:

        print(f'too many matching mat files: {mat_files}')
        return

    # get the codified version of this file
    code = misc.codify(filename)
    unisens_folder = ospath.join(unisens_folder, code)

    # if this unisens folder exists, skip if requested
    if skip_exist and ospath.isdir(unisens_folder): return

    # now create the meta information for the new file
    try:
        header = read_edf_header(edf_file)
    except:
        repair_file(edf_file)
        try:
            header = read_edf_header(edf_file)
        except Exception as e:
            print(f'cant load {filename}, broken edf {e}')
            return
    channels = header['channels']
    chs_eeg = [ch for ch in channels if 'EEG' in ch.upper()]
    chs = [ch for ch in channels if 'ECG' in ch.upper()]
    if 'cs_ECG' in chs and len(chs) > 1:
        chs.remove('cs_ECG')

    # add metadata for this file
    u = Patient(unisens_folder,
                makenew=True,
                autosave=True,
                measurementId=code)
    u.starttime = header['startdate']
    u.timestampStart = header['startdate'].strftime('%Y-%m-%dT%H:%M:%S')
    u.code = code
    u.duration = header['Duration']
    u.dataset = 'mnc'
    u.channels = str(', '.join(channels))
    u.startsec = (u.starttime.hour * 60 +
                  u.starttime.minute) * 60 + u.starttime.second
    if u.startsec == 0: print(edf_file)
    u.DQ0602 = attribs['DQ0602']
    u.hypocretin = attribs['CSF hypocretin-1']
    u.label = attribs['Label']
    u.cohort = attribs['Cohort']
    u.use_offset = 0
    u.gender = 'unknown'
    u.match = None

    diagnosis = attribs['Diagnosis']
    if 'CONTROL' in diagnosis:
        group = 'control'
    elif 'T1' in diagnosis:
        group = 'nt1'
    elif 'OTHER HYPERSOMNIA' in diagnosis:
        group = 'hypersomnia'
    else:
        group = attribs['Diagnosis']
        raise AttributeError(f'unkown group: {group} for {filename}')
    u.group = group

    # %% Add ECG channel
    if not 'ecg' in u or overwrite:
        # add the original ECG channel
        sig_orig, shead_orig, _ = read_edf(edf_file,
                                           ch_names=chs[0],
                                           verbose=False,
                                           digital=True)
        assert sig_orig.max() <= 32767 and sig_orig.min(
        ) >= -32768, 'min/max exceeds int16'
        pmin, pmax = shead_orig[0]['physical_min'], shead_orig[0][
            'physical_max']
        dmin, dmax = shead_orig[0]['digital_min'], shead_orig[0]['digital_max']
        lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
        attrib = {
            'data': sig_orig.astype(dtype),
            'sampleRate': shead_orig[0]['sample_rate'],
            'ch_names': 'ECG',
            'lsbValue': lsb,
            'baseline': offset,
            'unit': 'mV',
            'dmin': dmin,
            'dmax': dmax,
            'pmin': pmin,
            'pmax': pmax
        }
        u.sampling_frequency = shead_orig[0]['sample_rate']
        SignalEntry(id='ECG.bin', parent=u).set_data(**attrib)

    if (not 'eeg' in u or overwrite) and len(chs_eeg) > 0:
        # add the original ECG channel
        sig_orig, shead_orig, _ = read_edf(edf_file,
                                           ch_names=chs_eeg[0],
                                           verbose=False,
                                           digital=True)
        assert sig_orig.max() <= 32767 and sig_orig.min(
        ) >= -32768, 'min/max exceeds int16'
        pmin, pmax = shead_orig[0]['physical_min'], shead_orig[0][
            'physical_max']
        dmin, dmax = shead_orig[0]['digital_min'], shead_orig[0]['digital_max']
        lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
        attrib = {
            'data': sig_orig.astype(dtype),
            'sampleRate': shead_orig[0]['sample_rate'],
            'ch_names': 'EEG',
            'lsbValue': lsb,
            'baseline': offset,
            'unit': 'mV',
            'dmin': dmin,
            'dmax': dmax,
            'pmin': pmin,
            'pmax': pmax
        }
        u.sampling_frequency = shead_orig[0]['sample_rate']
        SignalEntry(id='EEG.bin', parent=u).set_data(**attrib)
    # %% now extract the RR intervals

    if not 'annotations' in u or overwrite:
        annotations = header['annotations']
        if annotations != []:
            annot_entry = EventEntry('annotations.csv', parent=u)
            annotations = [[int(a[0] * 1000), a[2]] for a in annotations]
            annot_entry.set_data(annotations,
                                 sampleRate=1000,
                                 typeLength=1,
                                 contentClass='Annotation')

    # %% add hypnogram, if it is available
    assert len(add_files) > 0, f'No hypno file? seems weird: {add_files}'
    if not 'hypnogram' in u or overwrite:
        if len(add_files) > 0:
            hypnograms = [
                sleep_utils.read_hypnogram(
                    file,
                    epochlen_infile=30 if file.endswith('annot') else None)
                for file in add_files
            ]

            hypno = hypnograms[0]
            u.epochs_hypno = len(hypno)
            times = np.arange(len(hypno))
            hypno = np.vstack([times, hypno]).T
            hypno_entry = EventEntry(id='hypnogram.csv', parent=u)
            hypno_entry.set_data(
                hypno,
                comment=f'File: {code}\nSleep stages 30s epochs.',
                sampleRate=1 / 30,
                contentClass='Stage',
                typeLength=1)

    # %% Add features
    if not 'feats.pkl' in u or overwrite:
        mat = loadmat(mat_file)
        HRV = mat['Res']['HRV']

        feats_entry = CustomEntry('feats.pkl', parent=u)
        feats_entry.set_data(
            HRV,
            comment='pickle dump of the kubios created features file',
            fileType='pickle')

        wsize = cfg.default_wsize
        step = cfg.default_step
        offset = True
        u.compute_features(offset=False)
        u.get_artefacts(wsize=wsize, step=step, offset=False)

        rri_entry = CustomEntry('RRi.pkl', parent=u)
        rri_entry.set_data(
            HRV['Data']['RRi'],
            comment='raw data of RRi, the interpolated RRs at 4hz',
            fileType='pickle')
        rri_entry.sampleRate = 4
    u.save()
    return True
Exemplo n.º 5
0
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 29 15:08:54 2020

This file helps to easily spot files which have the wrong polarity

@author: skjerns
"""
import ospath
import config as cfg
import matplotlib.pyplot as plt
from sleep_utils import read_edf
from tqdm import tqdm
if __name__ == '__main__':
    data = cfg.folder_edf
    files = ospath.list_files(data, exts='edf')
    _, ax = plt.subplots()
    for file in tqdm(files):
        png = 'C:/Users/Simon/Desktop/seg/' + ospath.basename(file) + '.png'
        if ospath.exists(png): continue

        data, sig, head = read_edf(file, ch_names=['ECG I'], verbose=False)
        data = data.squeeze()
        sfreq = sig[0]['sample_rate']
        half = len(data) // 2
        seg = data[half:half + 5 * sfreq]
        ax.clear()
        ax.plot(seg)
        plt.savefig(png)
Exemplo n.º 6
0
def anonymize_and_streamline(old_file, target_folder):
    """
    This function loads the edfs of a folder and
    1. removes their birthdate and patient name
    2. renames the channels to standardized channel names
    3. saves the files in another folder with a non-identifyable 
    4. verifies that the new files have the same content as the old
    """
    # load the two csvs with the edfs that we dont process and where the ECG is upside down
    pre_coding_discard = [
        line[0] for line in misc.read_csv(cfg.edfs_discard) if line[2] == '1'
    ]
    to_invert = [line[0] for line in misc.read_csv(cfg.edfs_invert)]

    # Here we read the list of controls and patients with their age and gender
    mappings = misc.read_csv(cfg.controls_csv)
    mappings.extend(misc.read_csv(cfg.patients_csv))
    mappings = dict([[name, {
        'gender': gender,
        'age': age
    }] for name, gender, age, *_ in mappings])

    # old name is the personalized file without file extension, e.g. thomas_smith(1)
    old_name = ospath.splitext(ospath.basename(old_file))[0]
    # new name is the codified version without extension e.g '123_45678'
    new_name = codify(old_name)

    # use a temporary file to write and then move it,
    # this avoids half-written files that cannot be read later
    tmp_name = tempfile.TemporaryFile(prefix='anonymize').name

    if old_name in pre_coding_discard:
        print('EDF is marked as corrupt and will be discarded')
        return

    # this is where the anonymized file will be stored
    new_file = ospath.join(target_folder, new_name + '.edf')

    if ospath.exists(new_file):
        print('New file extists already {}'.format(new_file))

    else:
        # anonymize
        print('Writing {} from {}'.format(new_file, old_name))
        assert ospath.isfile(old_file), f'{old_file} does not exist'
        signals, signal_headers, header = sleep_utils.read_edf(old_file,
                                                               digital=True,
                                                               verbose=False)
        # remove patient info
        header['birthdate'] = ''
        header['patientname'] = new_name
        header['patientcode'] = new_name
        header['gender'] = mappings[old_name]['gender']
        header['age'] = mappings[old_name]['age']

        # rename channels to a unified notation, e.g. EKG becomes ECG I
        for shead in signal_headers:
            ch = shead['label']
            if ch in ch_mapping:
                ch = ch_mapping[ch]
                shead['label'] = ch

        # Invert the ECG channel if necessary
        if old_name in to_invert:
            for i, sig in enumerate(signals):
                label = signal_headers[i]['label'].lower()
                if label == cfg.ecg_channel.lower():
                    signals[i] = -sig

        # we write to tmp to prevent that corrupted files are not left
        print('Writing tmp for {}'.format(new_file))
        sleep_utils.write_edf(tmp_name,
                              signals,
                              signal_headers,
                              header,
                              digital=True,
                              correct=True)

        # verify that contents for both files match exactly
        print('Verifying tmp for {}'.format(new_file))
        # embarrasing hack, as dmin/dmax dont in this files after inverting
        if not old_name == 'B0036':
            sleep_utils.compare_edf(old_file, tmp_name, verbose=False)

        # now we move the tmp file to its new location.
        shutil.move(tmp_name, new_file)

    # also copy additional file information ie hypnograms and kubios files
    old_dir = ospath.dirname(old_file)
    pattern = old_name.replace('_m', '').replace(
        '_w', '')  # remove gender from weitere nt1 patients
    add_files = ospath.list_files(
        old_dir,
        patterns=[f'{pattern}*txt', f'{pattern}*dat', f'{pattern}*mat'])
    for add_file in add_files:
        # e.g. .mat or .npy etc etc
        new_add_file = ospath.join(
            target_folder, ospath.basename(add_file.replace(pattern,
                                                            new_name)))
        if ospath.exists(new_add_file): continue
        # hypnograms will be copied to .hypno
        try:
            new_add_file = new_add_file.replace('-Schlafprofil', '')
            new_add_file = new_add_file.replace('_sl', '')
            new_add_file = new_add_file.replace('.txt', '.hypno').replace(
                '.dat', '.hypno')
            shutil.copy(add_file, new_add_file)
        except Exception as e:
            print(e)
    return old_name, new_name
Exemplo n.º 7
0
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 29 15:33:03 2020

@author: skjerns
"""
import ospath
import os, sys
import numpy as np
import shutil
import config as cfg
from tqdm import tqdm
from sleep import SleepSet
from pyedflib.highlevel import drop_channels


files = ospath.list_files(cfg.folder_edf, exts='edf')
mats = ospath.list_files(cfg.folder_edf, exts='mat', relative=True)
mats = [mat.replace('_hrv', '').replace('_small', '').replace('.mat', '') for mat in mats]

ecg_dir = cfg.folder_edf + '/edf_no_mat'
os.makedirs(ecg_dir)


for file in tqdm(files):
    code = ospath.basename(file)[:-4]
    if code in mats: continue
    drop_channels(os.path.join(cfg.folder_edf, f'{code}.edf'), os.path.join(ecg_dir,f'{code}.edf'), to_keep=['ECG I'])
Exemplo n.º 8
0
@author: skjerns
"""
import config as cfg
import ospath
import sleep_utils
from tqdm import tqdm
import numpy as np 

folder = cfg.folder_edf
files = ospath.list_files(folder, exts=['hypno'])

hypnos = {}
for file in tqdm(files):
    hypno = sleep_utils.read_hypnogram(file)
    hypnos[ospath.basename(file)] = hypno



res = np.zeros([len(hypnos), len(hypnos)])
samehypno = set()
for i, hypno1 in enumerate(hypnos.values()):
    for j, hypno2 in enumerate(hypnos.values()):
        minlen = min(len(hypno1), len(hypno2))
        same = np.mean(hypno1[:minlen]==hypno2[:minlen])
        res[i, j] = same
        if same==1:
            name1 = list(hypnos)[i]
            name2 = list(hypnos)[j]
            if name1!=name2:
                samehypno.add(tuple(sorted((name1, name2))))
Exemplo n.º 9
0
from sleep import Patient
import sleep_utils
import ospath
import config as cfg
import matplotlib.pyplot as plt
from tqdm import tqdm

if __name__ == '__main__':
    dataset = cfg.folder_edf
    files = ospath.list_files(dataset, exts='edf')
    ax = plt.subplot(1, 1, 1)
    for file in tqdm(files):
        plt.cla()
        sleep = Patient(file, channel='ECG I', verbose=False)
        sleep_utils.specgram_multitaper(sleep.data,
                                        sleep.sfreq,
                                        ufreq=10,
                                        ax=ax)
        plt.title('{} ECG'.format(ospath.basename(file)))
        png_file = file[:-4] + '_ecg.png'
        plt.savefig(png_file)

        plt.cla()
        sleep = Patient(file, verbose=False)
        sleep_utils.specgram_multitaper(sleep.data,
                                        sleep.sfreq,
                                        ufreq=35,
                                        ax=ax)
        plt.title('{} EEG'.format(ospath.basename(file)))
        png_file = file[:-4] + '_eeg.png'
        plt.savefig(png_file)