Пример #1
0
        not_matched.append(f'{p_name}')
    return matches, not_matched

#%% main
if __name__ == '__main__':      
    max_age_diff = cfg.max_age_diff
    
    # get the mappings from names to codes
    mappings = misc.get_mapping()

    # get the list of all subjects and controls
    patients_all = read_subjects(cfg.patients_csv)
    controls_all = read_subjects(cfg.controls_csv)
    
    # ignore these items when creating the matching
    to_discard = [line[0] for line in misc.read_csv(cfg.edfs_discard) if line[2]=='1']
    controls = {c:controls_all[c] for c in controls_all.copy() if not c in to_discard}
    patients = {p:patients_all[p] for p in patients_all.copy() if not p in to_discard}

    # matches, not_matched = greedy_matching(patients.copy(), controls.copy(), max_age_diff=max_age_diff)
    matches, not_matched = bootstrap_matchings(patients.copy(), controls.copy(), iterations=10000000, max_age_diff=max_age_diff)
    # matches, not_matched = pymatch_matching(patients.copy(), controls.copy())
    
    check_matches_unique(matches, not_matched)
    
    # now we create the csv_string that we will write to a file:
    lines = ['#Patient Name; Patient Code; Patient Gender; Patient Age; Control Name; Control Code; Control Gender; Control Age; Difference']
    for diff, match_i in enumerate(matches): #last one
        lines += [''] # add empty line before each new age diff section
        lines += [f'# +-{diff} age difference, {len(match_i)} matchings']
        for p_name, c_name in match_i:
Пример #2
0
import matplotlib
import matplotlib.pyplot as plt
from misc import read_csv
from misc import markers
from misc import colors
from misc import exist
from misc import err

args = sys.argv

if len(args) < 3:
    print(
        "python3 csv_spectra_plot_class.py [csv input file with spectra (nm)] [selected field for legending]"
    )
'''read the csv and locate the spectra'''
fields, data = read_csv(args[1])
nf = len(fields)  # number of fields
f_i = {fields[i]: i for i in range(nf)}

if len(args) < 3:  # call the program on all fields!
    for f in fields:
        if (f[-2:] != 'nm') and \
                (f not in ['ObjectID', 'GlobalID', 'x', 'y',
                           'ctr_lat', 'ctr_lon', 'image']):
            cmd = 'python3 ' + __file__ + ' ' + args[1] + ' ' + f
            print(cmd)
            a = os.system(cmd)
    sys.exit(1)

if args[2] not in fields:
    print("Error: field not found:", args[2])
Пример #3
0
def to_unisens(edf_file,
               unisens_folder,
               overwrite=False,
               tqdm_desc=None,
               skip_exist=False):
    pass
    # %% create unisens
    if tqdm_desc is None:
        tqdm_desc = lambda x: None
    dtype = np.int16
    code = ospath.basename(edf_file)[:-4]
    folder = ospath.dirname(edf_file)

    unisens_folder = ospath.join(unisens_folder, code)

    if skip_exist and ospath.isdir(unisens_folder): return

    # get all additional files that belong to this EDF
    add_files = ospath.list_files(folder, patterns=code + '*')
    u = Patient(unisens_folder,
                makenew=False,
                autosave=True,
                measurementId=code)
    header = read_edf_header(edf_file)
    all_labels = header['channels']
    u.starttime = header['startdate']
    u.timestampStart = header['startdate'].strftime('%Y-%m-%dT%H:%M:%S')
    u.code = code

    attribs = misc.get_attribs()
    u.group = attribs[code].get('group', 'none')
    u.gender = attribs[code].get('gender', 'none')

    u.drug_hrv = attribs[code].get('drug_hrv', 0)
    u.drug_sleep = attribs[code].get('drug_sleep', 0)

    u.age = attribs[code].get('age', -1)
    u.match = attribs[code].get('match', '')

    u.channels = str(', '.join(header['channels']))
    u.startsec = (u.starttime.hour * 60 +
                  u.starttime.minute) * 60 + u.starttime.second
    u.use_offset = 1

    # if the ECG/EEG is broken, mark it
    edfs_ecg_broken = [
        p[1] for p in misc.read_csv(cfg.edfs_discard) if p[3] == '1'
    ]
    edfs_eeg_broken = [
        p[1] for p in misc.read_csv(cfg.edfs_discard) if p[4] == '1'
    ]

    # we need to see if the eeg/emg of this file can be used
    # if one of them is broken we also remove its match from analysis
    u.ecg_broken = (code in edfs_ecg_broken) or (u.match in edfs_ecg_broken)
    u.eeg_broken = (code in edfs_eeg_broken) or (u.match in edfs_eeg_broken)

    # %% #### add ECG ##########
    ########################
    tqdm_desc(f'{code}: Reading ECG')

    if not 'ECG' in u or overwrite:
        signals, shead, header = read_edf(edf_file,
                                          ch_names=['ECG I'],
                                          digital=True,
                                          verbose=False)
        signals[:, 0:2] = np.percentile(signals, 10), np.percentile(
            signals, 90)  # trick for viewer automatic scaling
        pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
        dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

        lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
        attrib = {
            'data': signals.astype(dtype),
            'sampleRate': shead[0]['sample_rate'],
            'ch_names': 'ECG',
            'lsbValue': lsb,
            'baseline': offset,
            'unit': 'mV',
            'dmin': dmin,
            'dmax': dmax,
            'pmin': pmin,
            'pmax': pmax
        }

        SignalEntry(id='ECG.bin', parent=u).set_data(**attrib)

        u.sampling_frequency = shead[0]['sample_rate']
        u.duration = len(signals.squeeze()) // shead[0]['sample_rate']
        u.epochs_signals = signals.shape[1] // int(u.sampling_frequency) // 30

    # %%#### add EEG ##########
    ##############################
    tqdm_desc(f'{code}: Reading EEG')
    if not 'EEG' in u or overwrite:
        chs = sleep_utils.infer_eeg_channels(all_labels)
        signals, shead, header = read_edf(edf_file,
                                          ch_names=chs,
                                          digital=True,
                                          verbose=False)
        if isinstance(signals, list):
            signals = np.atleast_2d(signals[0])
            chs = chs[0]
        # trick for viewer automatic scaling
        signals[:, 0:2] = np.percentile(signals,
                                        10), np.percentile(signals, 90)
        pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
        dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

        lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
        attrib = {
            'data': signals.astype(dtype),
            'sampleRate': shead[0]['sample_rate'],
            'ch_names': chs,
            'lsbValue': lsb,
            'baseline': offset,
            'contentClass': 'EEG',
            'unit': 'uV',
            'dmin': dmin,
            'dmax': dmax,
            'pmin': pmin,
            'pmax': pmax
        }
        SignalEntry(id='EEG.bin', parent=u).set_data(**attrib)

    # %%## add EOG #########
    #######################
    if not 'EOG' in u or overwrite:
        tqdm_desc(f'{code}: Reading EOG')
        chs = sleep_utils.infer_eog_channels(all_labels)
        signals, shead, header = read_edf(edf_file,
                                          ch_names=chs,
                                          digital=True,
                                          verbose=False)
        if isinstance(signals, list):
            signals = np.atleast_2d(signals[0])
            chs = chs[0]
        # trick for viewer automatic scaling
        signals[:, 0:2] = np.percentile(signals,
                                        10), np.percentile(signals, 90)
        pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
        dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

        lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
        attrib = {
            'data': signals.astype(dtype),
            'sampleRate': shead[0]['sample_rate'],
            'ch_names': chs,
            'lsbValue': 1,
            'baseline': 0,
            'unit': 'uV',
            'dmin': dmin,
            'dmax': dmax,
            'pmin': pmin,
            'pmax': pmax
        }
        SignalEntry(id='EOG.bin', parent=u).set_data(**attrib)

    # %%#### add EMG #########

    if not 'EMG' in u or overwrite:
        tqdm_desc(f'{code}: Reading EMG')
        chs = sleep_utils.infer_emg_channels(all_labels)
        if chs != []:  # fix for 888_49272
            signals, shead, header = read_edf(edf_file,
                                              ch_names=chs,
                                              digital=True,
                                              verbose=False)
            if isinstance(signals, list):
                signals = np.atleast_2d(signals[0])
                chs = chs[0]
            # trick for viewer automatic scaling
            signals[:, 0:2] = np.percentile(signals,
                                            10), np.percentile(signals, 90)
            pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
            dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

            lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
            attrib = {
                'data': signals.astype(dtype),
                'sampleRate': shead[0]['sample_rate'],
                'ch_names': chs,
                'lsbValue': 1,
                'baseline': 0,
                'unit': 'uV',
                'dmin': dmin,
                'dmax': dmax,
                'pmin': pmin,
                'pmax': pmax
            }
            SignalEntry(id='EMG.bin', parent=u).set_data(**attrib)

    #######################################
    # %%add Thorax #########
    ######################
    if not 'thorax' in u or overwrite:
        tqdm_desc(f'{code}: Reading Thorax')
        signals, shead, header = read_edf(edf_file,
                                          ch_names=['Thorax'],
                                          digital=True,
                                          verbose=False)
        # trick for viewer automatic scaling
        signals[:, 0:2] = np.percentile(signals,
                                        10), np.percentile(signals, 90)

        pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
        dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

        lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
        attrib = {
            'data': signals.astype(dtype),
            'sampleRate': shead[0]['sample_rate'],
            'ch_names': 'thorax',
            'lsbValue': 1,
            'baseline': 0,
            'unit': 'uV',
            'dmin': dmin,
            'dmax': dmax,
            'pmin': pmin,
            'pmax': pmax
        }
        SignalEntry(id='thorax.bin', parent=u).set_data(**attrib)

    #######################################
    # %% add Body / Lagesensor #########
    ########################################
    if (not 'body' in u or overwrite) and 'Body' in all_labels:
        tqdm_desc(f'{code}: Reading Body')
        signals, shead, header = read_edf(edf_file,
                                          ch_names=['Body'],
                                          digital=True,
                                          verbose=False)
        signals[:, 0:2] = np.percentile(signals,
                                        10), np.percentile(signals, 90)

        if np.ptp(
                signals
        ) < 10:  # we have some weird body positions that we cant decode

            pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
            dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

            comment = 'Lagesensor: 1 = Bauchlage, 2 = aufrecht, 3 = links, 4 = rechts,' \
                      '5 = aufrecht (Kopfstand), 6 = Rückenlage'

            lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
            attrib = {
                'data': signals.astype(dtype),
                'sampleRate': shead[0]['sample_rate'],
                'ch_names': 'body',
                'lsbValue': 1,
                'baseline': 0,
                'unit': 'uV',
                'dmin': dmin,
                'dmax': dmax,
                'pmin': pmin,
                'pmax': pmax,
                'comment': comment
            }
            SignalEntry(id='body.bin', parent=u).set_data(**attrib)

    # %% add annotations #######
    ################################
    if not 'annotations' in u or overwrite:
        annotations = header['annotations']
        if annotations != []:
            annot_entry = EventEntry('annotations.csv', parent=u)
            annotations = [[int(a[0] * 1000), a[2]] for a in annotations]
            annot_entry.set_data(annotations,
                                 sampleRate=1000,
                                 typeLength=1,
                                 contentClass='Annotation')

    # %%#### add rest #######
    ############################
    for file in add_files:
        # ignore diagnosis files of StanfordStages
        if file.endswith(
            ('diagnosis.txt', 'hypnodensity.txt', 'hypnogram.txt')):  #
            pass
        # %% add arousals
        elif file.endswith('_arousal.txt'):
            if 'arousals' in u and not overwrite: continue
            lines = misc.read_csv(file, convert_nums=True)

            sdate = u.starttime
            data = []
            for t_arousal, length, _ in lines[4:]:
                t_arousal = f'{sdate.year}.{sdate.month}.{sdate.day} ' + t_arousal[:
                                                                                   8]
                t_arousal = datetime.strptime(t_arousal, '%Y.%m.%d %H:%M:%S')
                epoch = (t_arousal - sdate).seconds // 30
                data += [[epoch, length]]

            arousal_event = EventEntry('arousals.csv', parent=u)
            arousal_event.set_data(
                data,
                comment='Arousal appearance epoch, name is lengths in seconds',
                sampleRate=1 / 30,
                contentClass='Arousal',
                typeLength=1)
        # %% add hypnogram
        elif file.endswith('txt'):
            if 'hypnogram' in u and not overwrite: continue
            tqdm_desc(f'{code}: Reading Hypnogram')
            hypno = sleep_utils.read_hypnogram(file)
            u.epochs_hypno = len(hypno)
            times = np.arange(len(hypno))
            hypno = np.vstack([times, hypno]).T
            hypno_entry = EventEntry(id='hypnogram.csv', parent=u)
            hypno_entry.set_data(
                hypno,
                comment=f'File: {code}\nSleep stages 30s epochs.',
                sampleRate=1 / 30,
                contentClass='Stage',
                typeLength=1)

        elif file.endswith('.hypno'):
            if 'hypnogram_old' in u and not overwrite: continue
            hypno = sleep_utils.read_hypnogram(file)
            if not hasattr(u, 'epochs_hypno'): u.epochs_hypno = len(hypno)
            times = np.arange(len(hypno))
            hypno = np.vstack([times, hypno]).T
            hypno_old_entry = EventEntry(id='hypnogram_old.csv', parent=u)
            hypno_old_entry.set_data(
                hypno,
                comment=f'File: {code}\nSleep stages 30s epochs.',
                sampleRate=1 / 30,
                contentClass='Stage',
                typeLength=1)
        # %% add features and kubios
        elif file.endswith('mat'):
            if 'feats.pkl' in u and not overwrite: continue
            tqdm_desc(f'{code}: Reading Kubios')
            mat = loadmat(file)
            HRV = mat['Res']['HRV']

            feats_entry = CustomEntry('feats.pkl', parent=u)
            feats_entry.set_data(
                HRV,
                comment='pickle dump of the kubios created features file',
                fileType='pickle')

            wsize = cfg.default_wsize
            step = cfg.default_step
            offset = True
            u.compute_features()
            u.get_artefacts(wsize=wsize, step=step, offset=True)

            #%% add RRi

            tqdm_desc(f'{code}: writing RRi')

            rri_entry = CustomEntry('RRi.pkl', parent=u)
            rri_entry.set_data(
                HRV['Data']['RRi'],
                comment='raw data of RRi, the interpolated RRs at 4hz',
                fileType='pickle')
            rri_entry.sampleRate = 4

        # add artefact
        ############ removed artefact detection and calculated from kubios above
        # elif file.endswith('npy'):
        #     if  'artefacts' in u and not overwrite: continue
        #     tqdm_desc(f'{code}: Reading artefacts')
        #     art = np.load(file).ravel()
        #     u.epochs_art = len(art)//2
        #     u.artefact_percentage = np.mean(art)
        #     times = np.arange(len(art))
        #     art = np.vstack([times, art]).T
        #     artefact_entry = ValuesEntry(id='artefacts.csv', parent=u)
        #     artefact_entry.set_data(art, sampleRate=1/15, dataType='int16')

        elif file.endswith(('.edf', 'pkl')):
            pass

        else:
            raise Exception(f'unkown file type: {file}')

    u.save()
Пример #4
0
from misc import read_csv
import shutil
import ospath
import config as cfg
from tqdm import tqdm

if __name__ == '__main__':
    documents = cfg.documents
    datasets = [
        ospath.join(documents, 'mapping_' + d + '.csv') for d in cfg.datasets
    ]
    matching = cfg.matching
    set1_path = ospath.join(cfg.folder_edf, 'set1')
    set2_path = ospath.join(cfg.folder_edf, 'set2')

    matchings = read_csv(matching)

    set1 = read_csv(datasets[0])
    set2 = read_csv(datasets[1])

    os.makedirs(ospath.join(cfg.folder_edf, 'set1'), exist_ok=True)
    os.makedirs(ospath.join(cfg.folder_edf, 'set2'), exist_ok=True)
    os.makedirs(ospath.join(cfg.folder_edf, 'set1', 'not_matched'),
                exist_ok=True)
    os.makedirs(ospath.join(cfg.folder_edf, 'set2', 'not_matched'),
                exist_ok=True)

    # copy the files into nt1:matched set1 and nt1:matched set2 respectively
    for p_orig, p_coded, gender, age, c_name, c_coded, c_gender, c_age, diff in tqdm(
            matchings):
        if int(diff) > cfg.max_age_diff: break
Пример #5
0
def anonymize_and_streamline(old_file, target_folder):
    """
    This function loads the edfs of a folder and
    1. removes their birthdate and patient name
    2. renames the channels to standardized channel names
    3. saves the files in another folder with a non-identifyable 
    4. verifies that the new files have the same content as the old
    """
    # load the two csvs with the edfs that we dont process and where the ECG is upside down
    pre_coding_discard = [
        line[0] for line in misc.read_csv(cfg.edfs_discard) if line[2] == '1'
    ]
    to_invert = [line[0] for line in misc.read_csv(cfg.edfs_invert)]

    # Here we read the list of controls and patients with their age and gender
    mappings = misc.read_csv(cfg.controls_csv)
    mappings.extend(misc.read_csv(cfg.patients_csv))
    mappings = dict([[name, {
        'gender': gender,
        'age': age
    }] for name, gender, age, *_ in mappings])

    # old name is the personalized file without file extension, e.g. thomas_smith(1)
    old_name = ospath.splitext(ospath.basename(old_file))[0]
    # new name is the codified version without extension e.g '123_45678'
    new_name = codify(old_name)

    # use a temporary file to write and then move it,
    # this avoids half-written files that cannot be read later
    tmp_name = tempfile.TemporaryFile(prefix='anonymize').name

    if old_name in pre_coding_discard:
        print('EDF is marked as corrupt and will be discarded')
        return

    # this is where the anonymized file will be stored
    new_file = ospath.join(target_folder, new_name + '.edf')

    if ospath.exists(new_file):
        print('New file extists already {}'.format(new_file))

    else:
        # anonymize
        print('Writing {} from {}'.format(new_file, old_name))
        assert ospath.isfile(old_file), f'{old_file} does not exist'
        signals, signal_headers, header = sleep_utils.read_edf(old_file,
                                                               digital=True,
                                                               verbose=False)
        # remove patient info
        header['birthdate'] = ''
        header['patientname'] = new_name
        header['patientcode'] = new_name
        header['gender'] = mappings[old_name]['gender']
        header['age'] = mappings[old_name]['age']

        # rename channels to a unified notation, e.g. EKG becomes ECG I
        for shead in signal_headers:
            ch = shead['label']
            if ch in ch_mapping:
                ch = ch_mapping[ch]
                shead['label'] = ch

        # Invert the ECG channel if necessary
        if old_name in to_invert:
            for i, sig in enumerate(signals):
                label = signal_headers[i]['label'].lower()
                if label == cfg.ecg_channel.lower():
                    signals[i] = -sig

        # we write to tmp to prevent that corrupted files are not left
        print('Writing tmp for {}'.format(new_file))
        sleep_utils.write_edf(tmp_name,
                              signals,
                              signal_headers,
                              header,
                              digital=True,
                              correct=True)

        # verify that contents for both files match exactly
        print('Verifying tmp for {}'.format(new_file))
        # embarrasing hack, as dmin/dmax dont in this files after inverting
        if not old_name == 'B0036':
            sleep_utils.compare_edf(old_file, tmp_name, verbose=False)

        # now we move the tmp file to its new location.
        shutil.move(tmp_name, new_file)

    # also copy additional file information ie hypnograms and kubios files
    old_dir = ospath.dirname(old_file)
    pattern = old_name.replace('_m', '').replace(
        '_w', '')  # remove gender from weitere nt1 patients
    add_files = ospath.list_files(
        old_dir,
        patterns=[f'{pattern}*txt', f'{pattern}*dat', f'{pattern}*mat'])
    for add_file in add_files:
        # e.g. .mat or .npy etc etc
        new_add_file = ospath.join(
            target_folder, ospath.basename(add_file.replace(pattern,
                                                            new_name)))
        if ospath.exists(new_add_file): continue
        # hypnograms will be copied to .hypno
        try:
            new_add_file = new_add_file.replace('-Schlafprofil', '')
            new_add_file = new_add_file.replace('_sl', '')
            new_add_file = new_add_file.replace('.txt', '.hypno').replace(
                '.dat', '.hypno')
            shutil.copy(add_file, new_add_file)
        except Exception as e:
            print(e)
    return old_name, new_name
'''20211128 averaging over a window, where the windowed data are from:
    raster_extract_spectra.py'''   
import os
import sys
import csv
from misc import read_csv
from misc import exist
from misc import err
args = sys.argv

in_f = args[1]
if not exist(in_f):
    err('could not find input file: ' + in_f)

'''read the csv and locate the spectra'''
fields, data = read_csv(in_f)
fields = [x.strip().replace(',', '_') for x in fields]  # forbid comma in header
nf = len(fields)  # number of fields
f_i = {fields[i]:i for i in range(nf)}

'''insist on fields xoff and yoff'''
if (not 'xoff' in fields) or (not 'yoff' in fields):
    err("missing req'd fields: xoff, yoff")

spec_fi, nonspec_fi = [], []  # list col-idx for all spectral data columns
for i in range(nf):
    if fields[i][-2:] == 'nm':
        spec_fi += [i]
    else:  # list non-spec fields except: offset-index coding analysis-window pos'n
        if fields[i] not in ['xoff', 'yoff', 'row', 'lin']:
            nonspec_fi += [i]
args = sys.argv
from multiprocessing import Lock
lock = Lock()
n_processed = 0

if len(args) < 5:
    err('python3 csv_spectra_distance_simple.py [csv spectra file (one spectrum)] ' +
        ' [field to select from] [field value to select]' +     
        ' [raster file]')

csv_fn, dfn = args[1], args[4]
select_field = args[2]
select_value = args[3]

'''read the csv and locate the spectra'''
fields, csv_data = read_csv(csv_fn)
nf = len(fields)  # number of fields
f_i = {fields[i]:i for i in range(nf)}

spec_fi = []
for i in range(nf):
    if fields[i][-2:] == 'nm':
        spec_fi += [i]
print('spectra col-ix', spec_fi)
print('number of cols', len(spec_fi))

select_i = f_i[select_field] # index of col indicated to match on..

''' average the spectra where field select_field matches the value select_value'''
N = len(csv_data[0]) # number of data points
n_select, spec_avg = 0., [0. for i in range(len(spec_fi))] # averaged spectra goes here
Пример #8
0
from misc import read_csv, generate_csv
import csv

backlink_list = read_csv('backlink.csv')

domain_list = []
with open('buyers_guide.csv', newline='') as csvfile:
        linereader = csv.reader(csvfile)
        for line in linereader:
            domain_list.append(line[0])

audit_domain_list = []
audit_backlink = []
for backlink in backlink_list:
    try:
        index = domain_list.index(backlink[3])
    except:
        index = None
    if index is not None:
        # {'backlink' : backlink, 'domain' : backlink[0]}
        audit_backlink.append(backlink)
        if backlink[3] not in audit_domain_list:
            audit_domain_list.append(domain_list[index])

with open('multiple_audit_backlinks_bg.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile, delimiter=',')
    for backlink in audit_backlink:
        writer.writerow(backlink)

csv_columns = ['URL', 'Audited']
with open('audit_domains_bg.csv', 'w', newline='') as csvfile: