def write_hypnogram(hypno, filename, seconds_per_annotation=30, comment=None, overwrite=False): """ Save a hypnogram based on annotations per epochs in VisBrain style (ie. The exact onset of each sleep stage is annotated in time space.) This format is recommended for saving hypnograms as it avoids ambiguity. :param filename: where to save the data :param hypno: The hypnogram either as list or np.array :param seconds_per_epoch: How many seconds each annotation contains :param comment: Add a comment to the beginning of the file :param overwrite: overwrite file? """ assert not ospath.exists(filename) or overwrite, \ 'File already exists, no overwrite' hypno = np.repeat(hypno, seconds_per_annotation) hypno_str = hypno2time(hypno) if comment is not None: comment = comment.replace('\n', '\n*') hypno_str = '*' + comment + '\n' + hypno_str hypno_str = hypno_str.replace('\n\n', '\n') with open(filename, 'w') as f: f.write(hypno_str) return True
import numpy as np from tqdm import tqdm from pyedflib import highlevel import ospath import sleep_utils from sleep import SleepSet import config as cfg from datetime import datetime, timedelta if __name__ == '__main__': files = ospath.list_files(cfg.folder_edf, exts='edf') for file in files: hypno = file[:-4] + '.txt' if not ospath.exists(hypno): continue with open(file, 'rb') as f: c = f.read(184)[179:].decode().split('.') edf_start = datetime(2020, 10, 10, 10, int(c[0]), int(c[1])) edf_startsec = edf_start.minute * 60 + edf_start.second edf_reclen = int(f.read(60)[50:].decode()) edf_end = edf_start + timedelta(seconds=edf_reclen) with open(hypno, 'r') as f: c = f.readlines() hypno_start = datetime(2020, 10, 10, 10, int(c[7][3:5]), int(c[7][6:8])) hypno_startsec = hypno_start.minute * 60 + hypno_start.second hypno_end = datetime(2020, 10, 10, 10, int(c[-1][3:5]),
import ospath import shutil import sleep_utils import numpy as np from sklearn.metrics import cohen_kappa_score import matplotlib.pyplot as plt files = ospath.list_files(cfg.folder_edf, exts=['hypno']) accuracy = [] kohen = [] a = [] b = [] for file in files: if ospath.exists(file.replace('.hypno', '.txt')): hypno1 = sleep_utils.read_hypnogram(file) hypno2 = sleep_utils.read_hypnogram(file.replace('.hypno', '.txt')) minlen = min(len(hypno1), len(hypno2)) hypno1 = hypno1[:minlen] hypno2 = hypno2[:minlen] accuracy.append(np.mean(hypno1 == hypno2)) kohen.append(cohen_kappa_score(hypno1, hypno2)) hypno1[0] = 5 labels = {0: 'W', 4: 'REM', 1: 'S1', 2: 'S2', 3: 'SWS', 5: 'A'} if accuracy[-1] > 0.65: continue a.append(accuracy[-1]) b.append(kohen[-1])
@author: skjerns """ import os from sleep import SleepSet import sleep_utils import numpy as np import ospath import config as cfg import matplotlib.pyplot as plt from tqdm import tqdm from multiprocessing import Process, Queue if __name__ == '__main__': ss = SleepSet(cfg.folder_unisens) ss = ss.filter( lambda x: x.duration < 60 * 60 * 11) # only less than 14 hours ss = ss.filter( lambda x: x.group in ['control', 'nt1']) # only less than 14 hours ss = ss.filter(lambda x: np.mean(x.get_artefacts(only_sleeptime=True)) < 0.25) #only take patients with artefact percentage <25% for p in tqdm(ss[:250]): dataset = p.get_attrib('dataset', 'none') saveas = ospath.join(cfg.documents, 'plots', p.group, dataset, p.code + '.jpg') if ospath.exists(saveas): continue p.spectogram(channels=['ecg', 'RRi'], ufreq=2) os.makedirs(os.path.dirname(saveas), exist_ok=True) plt.savefig(saveas) plt.close('all')
exist_ok=True) os.makedirs(ospath.join(cfg.folder_edf, 'set2', 'not_matched'), exist_ok=True) # copy the files into nt1:matched set1 and nt1:matched set2 respectively for p_orig, p_coded, gender, age, c_name, c_coded, c_gender, c_age, diff in tqdm( matchings): if int(diff) > cfg.max_age_diff: break for patient, p_coded1 in set1: if patient == p_orig: assert p_coded == p_coded1 # sanity check old_location_nt1 = ospath.join(cfg.folder_edf, p_coded + '.edf') new_location_nt1 = ospath.join(cfg.folder_edf, 'set1', p_coded + '.edf') if not ospath.exists(new_location_nt1): shutil.copy(old_location_nt1, new_location_nt1) old_location_cnt = ospath.join(cfg.folder_edf, c_coded + '.edf') new_location_cnt = ospath.join(cfg.folder_edf.strip(), 'set1', c_coded.strip() + '.edf') if not ospath.exists(new_location_cnt): shutil.copy(old_location_cnt, new_location_cnt) for patient, p_coded1 in set2: if patient == p_orig: assert p_coded == p_coded1 # sanity check old_location_nt1 = ospath.join(cfg.folder_edf, p_coded + '.edf') new_location_nt1 = ospath.join(cfg.folder_edf, 'set2',
# -*- coding: utf-8 -*- """ Created on Wed Jan 29 15:08:54 2020 This file helps to easily spot files which have the wrong polarity @author: skjerns """ import ospath import config as cfg import matplotlib.pyplot as plt from sleep_utils import read_edf from tqdm import tqdm if __name__ == '__main__': data = cfg.folder_edf files = ospath.list_files(data, exts='edf') _, ax = plt.subplots() for file in tqdm(files): png = 'C:/Users/Simon/Desktop/seg/' + ospath.basename(file) + '.png' if ospath.exists(png): continue data, sig, head = read_edf(file, ch_names=['ECG I'], verbose=False) data = data.squeeze() sfreq = sig[0]['sample_rate'] half = len(data) // 2 seg = data[half:half + 5 * sfreq] ax.clear() ax.plot(seg) plt.savefig(png)
def anonymize_and_streamline(old_file, target_folder): """ This function loads the edfs of a folder and 1. removes their birthdate and patient name 2. renames the channels to standardized channel names 3. saves the files in another folder with a non-identifyable 4. verifies that the new files have the same content as the old """ # load the two csvs with the edfs that we dont process and where the ECG is upside down pre_coding_discard = [ line[0] for line in misc.read_csv(cfg.edfs_discard) if line[2] == '1' ] to_invert = [line[0] for line in misc.read_csv(cfg.edfs_invert)] # Here we read the list of controls and patients with their age and gender mappings = misc.read_csv(cfg.controls_csv) mappings.extend(misc.read_csv(cfg.patients_csv)) mappings = dict([[name, { 'gender': gender, 'age': age }] for name, gender, age, *_ in mappings]) # old name is the personalized file without file extension, e.g. thomas_smith(1) old_name = ospath.splitext(ospath.basename(old_file))[0] # new name is the codified version without extension e.g '123_45678' new_name = codify(old_name) # use a temporary file to write and then move it, # this avoids half-written files that cannot be read later tmp_name = tempfile.TemporaryFile(prefix='anonymize').name if old_name in pre_coding_discard: print('EDF is marked as corrupt and will be discarded') return # this is where the anonymized file will be stored new_file = ospath.join(target_folder, new_name + '.edf') if ospath.exists(new_file): print('New file extists already {}'.format(new_file)) else: # anonymize print('Writing {} from {}'.format(new_file, old_name)) assert ospath.isfile(old_file), f'{old_file} does not exist' signals, signal_headers, header = sleep_utils.read_edf(old_file, digital=True, verbose=False) # remove patient info header['birthdate'] = '' header['patientname'] = new_name header['patientcode'] = new_name header['gender'] = mappings[old_name]['gender'] header['age'] = mappings[old_name]['age'] # rename channels to a unified notation, e.g. EKG becomes ECG I for shead in signal_headers: ch = shead['label'] if ch in ch_mapping: ch = ch_mapping[ch] shead['label'] = ch # Invert the ECG channel if necessary if old_name in to_invert: for i, sig in enumerate(signals): label = signal_headers[i]['label'].lower() if label == cfg.ecg_channel.lower(): signals[i] = -sig # we write to tmp to prevent that corrupted files are not left print('Writing tmp for {}'.format(new_file)) sleep_utils.write_edf(tmp_name, signals, signal_headers, header, digital=True, correct=True) # verify that contents for both files match exactly print('Verifying tmp for {}'.format(new_file)) # embarrasing hack, as dmin/dmax dont in this files after inverting if not old_name == 'B0036': sleep_utils.compare_edf(old_file, tmp_name, verbose=False) # now we move the tmp file to its new location. shutil.move(tmp_name, new_file) # also copy additional file information ie hypnograms and kubios files old_dir = ospath.dirname(old_file) pattern = old_name.replace('_m', '').replace( '_w', '') # remove gender from weitere nt1 patients add_files = ospath.list_files( old_dir, patterns=[f'{pattern}*txt', f'{pattern}*dat', f'{pattern}*mat']) for add_file in add_files: # e.g. .mat or .npy etc etc new_add_file = ospath.join( target_folder, ospath.basename(add_file.replace(pattern, new_name))) if ospath.exists(new_add_file): continue # hypnograms will be copied to .hypno try: new_add_file = new_add_file.replace('-Schlafprofil', '') new_add_file = new_add_file.replace('_sl', '') new_add_file = new_add_file.replace('.txt', '.hypno').replace( '.dat', '.hypno') shutil.copy(add_file, new_add_file) except Exception as e: print(e) return old_name, new_name
def test_list_files(self): path = '.' files = ospath.list_files(path) self.assertEqual(len(files), 1) self.assertIn('/ospath_ut.py', files[0]) files = ospath.list_files(path, relative=True) self.assertEqual(len(files), 1) self.assertEqual('ospath_ut.py', files[0]) files = ospath.list_files(path, exts='png') self.assertEqual(len(files), 0) files = ospath.list_files(path, exts='png', subfolders=True) self.assertEqual(len(files), 6) files = ospath.list_files(path, exts='*image1*', subfolders=True) self.assertEqual(len(files), 0) files = ospath.list_files(path, patterns='*image1*', subfolders=True) self.assertEqual(len(files), 8) files = ospath.list_files(path, exts=['png', '.jpg', 'txt'], subfolders=True) self.assertEqual(len(files), 10) files = ospath.list_files(path, exts=['.png', '.jpg', 'txt'], subfolders=True) self.assertEqual(len(files), 8) files = ospath.list_files(path, exts=['.png', '.jpg', 'txt'], patterns='*_ut*', subfolders=True) self.assertEqual(len(files), 9) files = ospath.list_files(path, exts=['.png', '.jpg', 'txt'], patterns=['*_ut*'], subfolders=True) self.assertEqual(len(files), 9) files = ospath.list_files(path, patterns=['**/*txt'], subfolders=False) self.assertEqual(len(files), 2) files = ospath.list_files(path, subfolders=True) self.assertEqual(len(files), 15) for file in files: self.assertTrue(ospath.isfile(file)) self.assertTrue(ospath.exists(file)) files = ospath.list_files(path, subfolders=True, relative=True, return_strings=False) self.assertEqual(len(files), 15) for file in files: self.assertTrue(ospath.isfile(file)) self.assertTrue(ospath.exists(file)) self.assertFalse(isinstance(file, str))