def get_ref_df_RPA(ref_test_files, all_files, cachedir, segement=None):
    print('using segement 12s ', cachedir)
    # window_len = 250 * segement  # 12 seconds
    with open(all_files, 'r') as f:
        all_filenames = f.readlines()
    print(len(all_filenames))
    # print (all_filenames[-1])
    count = 0
    f_prev = 'none'
    with open(ref_test_files, 'r') as f:
        while True:

            # Get next line from file
            line = f.readline()

            # if line is empty
            # end of file is reached
            if not line:
                break
            # print("Line{}: {}".format(count, line.strip()))
            fn, st, sp, cl, _ = line.strip().split(' ')
            # if fn == '00010418_s016_t006': # (pyst: nedc_load_edf): failed to open
            #     continue

            count += 1
            if sp != 'END':
                st, sp = float(st), float(sp)
                # print (fn, st, sp, cl)
            else:
                st = float(st)

            if fn != f_prev:
                fn_full = [name for name in all_filenames if fn in name]
                print(fn_full)
                if len(fn_full) == 1:
                    fn_full = fn_full[0].strip()
                    print(fn_full)

                    try:
                        fsamp, data = read_edf_elec(
                            fn_full, parameters="params_RPA_addECG.txt")
                        print(fsamp, data.shape)
                    except:
                        print('can not read', fn_full)
                        with open('RPA_ECG_wrong_1.txt', 'a') as f1:
                            f1.write(fn_full + '\n')
                            f1.close()
                            continue
                    # resample to 250 if sampling rate is higher
                    # if fsamp > 250:
                    #     print('Resampling data from {} to 250 Hz'.format(fsamp))
                    #     data = resample(data, int(data.shape[1] * 250.0 / fsamp), axis=1)

            else:
                print('same file')

            if sp != 'END':
                end = sp * fsamp
            else:
                end = data.shape[1]

            i = 0

            window_len = int(fsamp * segement)  # 12 seconds
            chs = [
                'Fp1', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'T3', 'C3', 'Cz',
                'C4', 'T4', 'T5', 'P3', 'Pz', 'P4', 'T6', 'O1', 'O2'
            ]
            while (st + i) * fsamp + window_len < end:
                s = data[:19,
                         int((st + i) * fsamp):int((st + i) * fsamp) +
                         window_len]
                s_eeg = data[19:,
                             int((st + i) * fsamp):int((st + i) * fsamp) +
                             window_len] * 500

                # s_ap, chs_ap_ = convert_AP_montage(seg, chs)
                # chs_ap_ = [ch for ch in chs_ap_]

                # detect if signal is interupted, e.g., all dc, overflow
                if detect_interupted_data(s.transpose(), fsamp):
                    print(
                        'BAD DATA DETECTED! Skipping this {}-second segment due to interupted signals...'
                        .format(segement))
                    i += 12
                    continue
                else:
                    print('GOOD DATA!')

                # raw = create_mne_raw(s, fsamp, chs)
                # raw.plot(block=True, scalings=50e-6, remove_dc=True, lowpass=70, title='Raw - 0.5-70 Hz')

                ica_filt_s = ica_arti_remove(s, fsamp, chs)

                if ica_filt_s is None:
                    print(
                        'Skipping this {}-second segment due to failed ICA...'.
                        format(segement))
                    i += 12
                    continue

                # raw_ica = create_mne_raw(ica_filt_s, fsamp, chs)
                # raw_ica.plot(block=True, scalings=50e-6, remove_dc=True, lowpass=70, title='ICA-denoised - 0.5-70')

                # resample to 250 if sampling rate is higher
                ica_filt_s = resample(ica_filt_s,
                                      int(ica_filt_s.shape[1] * 250.0 / fsamp),
                                      axis=1)
                s_eeg = resample(s_eeg,
                                 int(s_eeg.shape[1] * 250.0 / fsamp),
                                 axis=1)

                if cl == "seiz":
                    # train setting
                    #i+=1
                    # dev setting
                    i += 12
                else:
                    # train setting
                    #i+=6
                    # dev setting
                    i += 12

                #print(s.shape)
                assert s.shape[1] == window_len

                prep_s = calc_stft(ica_filt_s)
                ECG = calc_stft(s_eeg)
                #print(prep_s.shape, ECG.shape)
                prep_s = np.concatenate(np.concatenate([prep_s, ECG], axis=2))
                prep_s = np.expand_dims(prep_s, axis=0)
                print('stft shape', prep_s.shape)
                prep_fn = '{}/{}_{}_{}_{}.npy'.format(cachedir, fn, i, cl, st)
                print('save to {}'.format(prep_fn))

                assert prep_s.shape == (1, 2 * segement - 1, 20, 125)
                np.save(prep_fn, prep_s)

            f_prev = fn
def get_ref_train_df_TUH(ref_train_file, all_files, cachedir, segement=None):
    print('using segement 12s ')
    window_len = 250 * segement  # 12 seconds
    with open(all_files, 'r') as f:
        all_filenames = f.readlines()
    print(len(all_filenames))
    # print (all_filenames[-1])
    count = 0
    with open(ref_train_file, 'r') as f:
        while True:

            # Get next line from file
            line = f.readline()

            # if line is empty
            # end of file is reached
            if not line:
                break
            # print("Line{}: {}".format(count, line.strip()))
            fn, st, sp, cl, _ = line.strip().split(' ')
            # if fn == '00010418_s016_t006': # (pyst: nedc_load_edf): failed to open
            #     continue
            count += 1
            st, sp = float(st), float(sp)
            # print (fn, st, sp, cl)
            fn_full = [name for name in all_filenames if fn in name]
            # print (fn_full)
            if len(fn_full) == 1:
                fn_full = fn_full[0].strip()
                print(fn_full)
                try:
                    fsamp, data = read_edf_elec(
                        fn_full, parameters="params_TUH_ECG.txt")
                    #fsamp, data = read_edf_elec(fn_full,parameters = "params_RPA_common_electrodes.txt")
                except:
                    print('can not read', fn_full)
                    with open('dev_ica_EEG_wrong.txt', 'a') as f1:
                        f1.write(fn_full + '\n')
                        f1.close()
                        continue

                print(fsamp, data.shape)

                # resample to 250 if sampling rate is higher
                if fsamp > 250:
                    print('Resampling data from {} to 250 Hz'.format(fsamp))
                    data = resample(data,
                                    int(data.shape[1] * 250.0 / fsamp),
                                    axis=1)

                i = 0
                chs = [
                    'Fp1', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'T3', 'C3',
                    'Cz', 'C4', 'T4', 'T5', 'P3', 'Pz', 'P4', 'T6', 'O1', 'O2'
                ]
                while (st + i) * 250 + window_len < sp * 250:
                    s = data[:,
                             int((st + i) * 250):int((st + i) * 250) +
                             window_len]

                    # detect if signal is interupted, e.g., all dc, overflow
                    if detect_interupted_data(s.transpose(), 250):
                        print(
                            'BAD DATA DETECTED! Skipping this {}-second segment due to interupted signals...'
                            .format(segement))
                        i += 12
                        continue
                    else:
                        print('GOOD DATA!')

                    #raw = create_mne_raw(s, fsamp, chs)
                    #raw.plot(block=True, scalings=50e-6, remove_dc=True, lowpass=70, title='Raw - 0.5-70 Hz')

                    ica_filt_s = ica_arti_remove(s, 250, chs)

                    if ica_filt_s is None:
                        print(
                            'Skipping this {}-second segment due to failed ICA...'
                            .format(segement))
                        i += 12
                        continue

                    if cl == "seiz":
                        # train setting
                        # i+=1
                        # dev setting
                        i += 12
                    else:
                        # train setting
                        # i+=6
                        # dev setting
                        i += 12

                    assert s.shape[1] == window_len

                    prep_s = calc_stft(ica_filt_s)
                    print('stft shape', prep_s.shape)
                    prep_fn = '{}/{}_{}_{}_{}.npy'.format(
                        cachedir, fn, i, cl, st)

                    assert prep_s.shape == (1, 2 * segement - 1, 19, 125)
                    np.save(prep_fn, prep_s)
Exemplo n.º 3
0
def get_ref_train_df(ref_train_file, all_files, cachedir):
    window_len = 250 * 5  # 5 seconds
    with open(all_files, 'r') as f:
        all_filenames = f.readlines()
    print(len(all_filenames))
    # print (all_filenames[-1])
    count = 0
    with open(ref_train_file, 'r') as f:
        while True:

            # Get next line from file
            line = f.readline()

            # if line is empty
            # end of file is reached
            if not line:
                break
            # print("Line{}: {}".format(count, line.strip()))
            fn, st, sp, cl, _ = line.strip().split(' ')
            # if fn == '00010418_s016_t006': # (pyst: nedc_load_edf): failed to open
            #     continue
            count += 1
            st, sp = float(st), float(sp)
            # print (fn, st, sp, cl)
            fn_full = [name for name in all_filenames if fn in name]
            # print (fn_full)
            if len(fn_full) == 1:
                fn_full = fn_full[0].strip()
                print(fn_full)
                fsamp, data = read_edf_elec(fn_full)
                print(fsamp, data.shape)

                # resample to 250 if sampling rate is higher
                if fsamp > 250:
                    print('Resampling data from {} to 250 Hz'.format(fsamp))
                    data = resample(data, int(data.shape[1] * 250.0 / fsamp), axis=1)
                i = 0
                while (st + i) * 250 + window_len < sp * 250:
                    s = data[:, int((st + i) * 250): int((st + i) * 250) + window_len]
                    diff1 = s[3:4, :] - s[2:3, :]
                    # print(i,'done')
                    diff2 = s[13:14, :] - s[17:18, :]
                    s = np.concatenate((diff1, diff2), axis=0)
                    if cl == "seiz":
                        i+=1
                    else:
                        i+=3
                    print('Raw time-series shape', s.shape)

                    assert s.shape[1] == window_len

                    prep_s = calc_stft(s)
                    prep_fn = '{}/{}_{}_{}_{}.npy'.format(cachedir, fn, i, cl, st)

                    print('Preprocessed shape', prep_s.shape)
                    assert prep_s.shape == (1, 9, 2, 125)
                    np.save(prep_fn, prep_s)

                # getting "previous" signals for seizure data
                # take 2 seconds before seizure and concat with the 1st second of sz
                if cl == "seiz":
                    for i_a in range(2):
                        if st - i_a - 3 >= 0:
                            s = data[:, int((st - i_a - 3) * 250): int((st - i_a - 3) * 250) + window_len]
                            diff1 = s[3:4, :] - s[2:3, :]
                            # print(i,'done')
                            diff2 = s[13:14, :] - s[17:18, :]
                            s = np.concatenate((diff1, diff2), axis=0)
                            print('Additional raw time-series shape', st - i_a - 3, s.shape, int((st - i_a - 3) * 250),
                                  int((st - i_a - 3) * 250) + window_len)
                            if s.shape[1] == window_len:
                                # assert s.shape[1] == window_len
                                prep_s = calc_stft(s)
                                prep_fn = '{}/{}_{}_{}_{}.npy'.format(cachedir, fn, -i_a - 3, cl, st)
                                assert prep_s.shape == (1, 9, 2, 125)

                                print('Additional preprocessed shape', prep_s.shape)
                                np.save(prep_fn, prep_s)