Exemplo n.º 1
0
def test_ecg_clean():

    sampling_rate = 1000
    noise = 0.05

    ecg = nk.ecg_simulate(sampling_rate=sampling_rate, noise=noise)
    ecg_cleaned_nk = nk.ecg_clean(ecg,
                                  sampling_rate=sampling_rate,
                                  method="neurokit")

    assert ecg.size == ecg_cleaned_nk.size

    # Assert that highpass filter with .5 Hz lowcut was applied.
    fft_raw = np.abs(np.fft.rfft(ecg))
    fft_nk = np.abs(np.fft.rfft(ecg_cleaned_nk))

    freqs = np.fft.rfftfreq(ecg.size, 1 / sampling_rate)

    assert np.sum(fft_raw[freqs < .5]) > np.sum(fft_nk[freqs < .5])

    # Comparison to biosppy (https://github.com/PIA-Group/BioSPPy/blob/e65da30f6379852ecb98f8e2e0c9b4b5175416c3/biosppy/signals/ecg.py#L69)
    ecg_biosppy = nk.ecg_clean(ecg,
                               sampling_rate=sampling_rate,
                               method="biosppy")
    original, _, _ = biosppy.tools.filter_signal(signal=ecg,
                                                 ftype='FIR',
                                                 band='bandpass',
                                                 order=int(0.3 *
                                                           sampling_rate),
                                                 frequency=[3, 45],
                                                 sampling_rate=sampling_rate)
    assert np.allclose((ecg_biosppy - original).mean(), 0, atol=1e-6)
Exemplo n.º 2
0
def extract_rpeak_features(row, signal):
    """
    Extract the R peak features.

    :param row: a `BaseDataset` row to calculate the features from
    :param signal: the raw ECG signal
    :return: `row` with the added features
    """
    ecg_cleaned = nk.ecg_clean(signal, sampling_rate=row.Fs)

    peaks, info = nk.ecg_peaks(ecg_cleaned, sampling_rate=row.Fs)
    r_peaks_sec = np.where(peaks['ECG_R_Peaks'].to_numpy() == 1)[0].astype(
        np.float32)
    r_peaks_sec /= row.Fs  # get R-peak times in seconds

    num_peaks = len(r_peaks_sec)
    if num_peaks > 2:
        hrv = nk.hrv(peaks, sampling_rate=row.Fs, show=False).iloc[0]
        row = row.append(hrv)
    row['N_QRS'] = num_peaks

    rr = np.diff(r_peaks_sec)
    row = row.append(get_statistics(rr, 'RR'))
    row = row.append(get_statistics(signal, 'signal'))

    return row, info
def get_HRVs_values(data, header_data):

    filter_lowcut = 0.001
    filter_highcut = 15.0
    filter_order = 1

    tmp_hea = header_data[0].split(' ')
    ptID = tmp_hea[0]
    num_leads = int(tmp_hea[1])
    sample_Fs = int(tmp_hea[2])
    gain_lead = np.zeros(num_leads)

    for ii in range(num_leads):
        tmp_hea = header_data[ii + 1].split(' ')
        gain_lead[ii] = int(tmp_hea[2].split('/')[0])

    # for testing, we included the mean age of 57 if the age is a NaN
    # This value will change as more data is being released
    for iline in header_data:
        if iline.startswith('#Age'):
            tmp_age = iline.split(': ')[1].strip()
            age = int(tmp_age if tmp_age != 'NaN' else 57)
        elif iline.startswith('#Sex'):
            tmp_sex = iline.split(': ')[1]
            if tmp_sex.strip() == 'Female':
                sex = 1
            else:
                sex = 0
        elif iline.startswith('#Dx'):
            label = iline.split(': ')[1].split(',')[0]

    signal = data[1]
    gain = gain_lead[1]

    ecg_signal = nk.ecg_clean(signal * gain,
                              sampling_rate=sample_Fs,
                              method="biosppy")
    _, rpeaks = nk.ecg_peaks(ecg_signal, sampling_rate=sample_Fs)
    hrv_time = nk.hrv_time(rpeaks, sampling_rate=sample_Fs)
    # hrv_non = nk.hrv_nonlinear(rpeaks, sampling_rate=sample_Fs)
    try:
        signal_peak, waves_peak = nk.ecg_delineate(ecg_signal,
                                                   rpeaks,
                                                   sampling_rate=sample_Fs)
        p_peaks = waves_peak['ECG_P_Peaks']
    except ValueError:
        print('Exception raised!')
        pass

    p_peaks = np.asarray(p_peaks, dtype=float)
    p_peaks = p_peaks[~np.isnan(p_peaks)]
    p_peaks = [int(a) for a in p_peaks]
    mean_P_Peaks = np.mean([signal[w] for w in p_peaks])

    hrv_time['mean_P_Peaks'] = mean_P_Peaks
    hrv_time['age'] = age
    hrv_time['label'] = label
    # df = pd.concat([hrv_time, hrv_non], axis=1)

    return hrv_time
Exemplo n.º 4
0
def test_ecg_rate():

    sampling_rate = 1000
    noise = 0.15

    ecg = nk.ecg_simulate(duration=120,
                          sampling_rate=sampling_rate,
                          noise=noise,
                          random_state=42)
    ecg_cleaned_nk = nk.ecg_clean(ecg,
                                  sampling_rate=sampling_rate,
                                  method="neurokit")

    signals, info = nk.ecg_peaks(ecg_cleaned_nk, method="neurokit")

    # Test without desired length.
    rate = nk.ecg_rate(rpeaks=info, sampling_rate=sampling_rate)

    assert rate.shape == (info["ECG_R_Peaks"].size, )
    assert np.allclose(rate.mean(), 70, atol=2)

    # Test with desired length.
    test_length = 1200
    rate = nk.ecg_rate(rpeaks=info,
                       sampling_rate=sampling_rate,
                       desired_length=test_length)

    assert rate.shape == (test_length, )
    assert np.allclose(rate.mean(), 70, atol=2)
Exemplo n.º 5
0
def test_ecg_peaks():

    sampling_rate = 1000
    noise = 0.15

    ecg = nk.ecg_simulate(duration=120,
                          sampling_rate=sampling_rate,
                          noise=noise,
                          random_state=42)
    ecg_cleaned_nk = nk.ecg_clean(ecg,
                                  sampling_rate=sampling_rate,
                                  method="neurokit")

    # Test without request to correct artifacts.
    signals, info = nk.ecg_peaks(ecg_cleaned_nk,
                                 correct_artifacts=False,
                                 method="neurokit")

    assert signals.shape == (120000, 1)
    assert np.allclose(signals["ECG_R_Peaks"].values.sum(dtype=np.int64),
                       139,
                       atol=1)

    # Test with request to correct artifacts.
    signals, info = nk.ecg_peaks(ecg_cleaned_nk,
                                 correct_artifacts=True,
                                 method="neurokit")

    assert signals.shape == (120000, 1)
    assert np.allclose(signals["ECG_R_Peaks"].values.sum(dtype=np.int64),
                       139,
                       atol=1)
    def qrs_detection_pantompkins_vs_neurokit(self, dataset):
        row = dataset.data.iloc[50]
        signal = dataset.read_record(row.Record)[:row.Fs * 20 + 1]

        method_names = {'pantompkins': 'Pan–Tompkins', 'neurokit': 'Neurokit'}

        for method in ['pantompkins', 'neurokit']:
            ecg_cleaned = nk.ecg_clean(signal,
                                       sampling_rate=row.Fs,
                                       method=method)
            peaks, info = nk.ecg_peaks(ecg_cleaned,
                                       sampling_rate=row.Fs,
                                       method=method)

            r_peaks = np.where(peaks['ECG_R_Peaks'].to_numpy() == 1)[0]

            fig = go.Figure()
            fig.add_trace(
                go.Scatter(x=np.arange(len(signal)) / row.Fs, y=signal))
            fig.add_trace(
                go.Scatter(mode='markers',
                           x=r_peaks / row.Fs,
                           y=signal[r_peaks]))
            fig.update_traces(marker=dict(size=8))
            self.set_ecg_layout(
                fig,
                title=f'{row.Record} - R peaks ({method_names[method]} method)',
                showlegend=False,
                xaxis=dict(range=[0, 20]),
                yaxis=dict(range=[-5000, 5000]))
            self.save_image(fig, f'qrs_{method}.png', width=900, height=300)
Exemplo n.º 7
0
def process_X_values(X, Y):

    dfs = []

    Y["diagnostic_superclass"] = Y["diagnostic_superclass"].swifter.apply(
        lambda x: 0 if x == "NORM" else
        (1 if x == "MI" else (2 if x == "STTC" else (3 if x == "HYP" else 4))))

    for v in tqdm(range(0, len(X))):
        temp = pd.DataFrame(X[v],
                            columns=[
                                'I', 'II', 'III', 'aVL', 'aVR', 'aVF', 'V1',
                                'V2', 'V3', 'V4', 'V5', 'V6'
                            ])

        for value in temp.columns:
            ecg = np.array(temp[value])
            try:
                signals = nk.ecg_clean(ecg,
                                       sampling_rate=250,
                                       method='pantompkins1985')
            except:
                signals = nk.ecg_clean(ecg,
                                       sampling_rate=100,
                                       method='pantompkins1985')

            temp[value] = signals

        temp['id'] = Y.iloc[v].patient_id
        s = temp.groupby('id').cumcount().add(1)
        temp = (temp.set_index(['id', s]).unstack().sort_index(axis=1,
                                                               level=1))
        temp['diagnostic_superclass'] = Y.iloc[v].diagnostic_superclass
        temp['strat_fold'] = Y.iloc[v].strat_fold
        temp['id'] = Y.iloc[v].patient_id
        dfs.append(temp)

    data = pd.concat(dfs)
    data = data[~np.isnan(data.id)]
    return data
Exemplo n.º 8
0
 def apply(sampling_rate):
     global x_global, y_global
     ecg = []
     ecg.append(x_global)
     ecg.append(y_global)
     window_autofilter.destroy()
     ecg[1] = nk.ecg_clean(ecg[1], sampling_rate = sampling_rate)
     x_global = ecg[0]
     y_global = ecg[1]
     a.clear()
     a.plot(ecg[0], ecg[1])
     plt.xlabel('time [s]')
     plt.ylabel('voltage [mV]')
     canvas.draw()
Exemplo n.º 9
0
def test_ecg_findpeaks():

    sampling_rate = 1000

    ecg = nk.ecg_simulate(duration=60, sampling_rate=sampling_rate, noise=0, method="simple", random_state=42)

    ecg_cleaned = nk.ecg_clean(ecg, sampling_rate=sampling_rate, method="neurokit")

    # Test neurokit methodwith show=True
    info_nk = nk.ecg_findpeaks(ecg_cleaned, show=True)

    assert info_nk["ECG_R_Peaks"].size == 69
    # This will identify the latest figure.
    fig = plt.gcf()
    assert len(fig.axes) == 2

    # Test pantompkins1985 method
    info_pantom = nk.ecg_findpeaks(nk.ecg_clean(ecg, method="pantompkins1985"), method="pantompkins1985")
    assert info_pantom["ECG_R_Peaks"].size == 70

    # Test hamilton2002 method
    info_hamilton = nk.ecg_findpeaks(nk.ecg_clean(ecg, method="hamilton2002"), method="hamilton2002")
    assert info_hamilton["ECG_R_Peaks"].size == 69

    # Test christov2004 method
    info_christov = nk.ecg_findpeaks(ecg_cleaned, method="christov2004")
    assert info_christov["ECG_R_Peaks"].size == 273

    # Test gamboa2008 method
    info_gamboa = nk.ecg_findpeaks(ecg_cleaned, method="gamboa2008")
    assert info_gamboa["ECG_R_Peaks"].size == 69

    # Test elgendi2010 method
    info_elgendi = nk.ecg_findpeaks(nk.ecg_clean(ecg, method="elgendi2010"), method="elgendi2010")
    assert info_elgendi["ECG_R_Peaks"].size == 70

    # Test engzeemod2012 method
    info_engzeemod = nk.ecg_findpeaks(nk.ecg_clean(ecg, method="engzeemod2012"), method="engzeemod2012")
    assert info_engzeemod["ECG_R_Peaks"].size == 70

    # Test kalidas2017 method
    info_kalidas = nk.ecg_findpeaks(nk.ecg_clean(ecg, method="kalidas2017"), method="kalidas2017")
    assert np.allclose(info_kalidas["ECG_R_Peaks"].size, 68, atol=1)

    # Test martinez2003 method
    ecg = nk.ecg_simulate(duration=60, sampling_rate=sampling_rate, noise=0, random_state=42)
    ecg_cleaned = nk.ecg_clean(ecg, sampling_rate=sampling_rate, method="neurokit")
    info_martinez = nk.ecg_findpeaks(ecg_cleaned, method="martinez2003")
    assert np.allclose(info_martinez["ECG_R_Peaks"].size, 69, atol=1)
def generate_features(ecg, header):
    #input: 12-lead ecg and its header
    fs = 500
    features = {}

    lead_names = []
    for iline in header:
        if '.mat' in iline:
            name = iline.split(' 0 ')[2].strip()
            lead_names.append(name)

    for ecg_signal, lead in zip(ecg, lead_names):

        ecg_cleaned = nk.ecg_clean(ecg_signal, sampling_rate=fs)

        if np.all((ecg_cleaned == 0)):
            return None
        else:
            _, rpeaks = nk.ecg_peaks(ecg_cleaned, sampling_rate=fs)

            if rpeaks['ECG_R_Peaks'].size == 0:
                return None
            else:
                try:
                    signal_dwt, waves_dwt = nk.ecg_delineate(
                        ecg_cleaned,
                        rpeaks['ECG_R_Peaks'],
                        sampling_rate=fs,
                        method="dwt")
                    biphase, areas, t_till_peaks, ampls, dur, idxs, pq_intervals = p_peak_features(
                        ecg_cleaned, waves_dwt)
                    features_for_single_lead = {
                        'PQ_int': calculate_features(pq_intervals),
                        'P_dur': calculate_features(dur),
                        'Area/Dur_P': calculate_features(idxs),
                        'Area_under_P': calculate_features(areas),
                        'P_amp': calculate_features(ampls),
                        'Time_till_P': calculate_features(t_till_peaks),
                        'Biphase_P': calculate_features(biphase)
                    }
                except IndexError:
                    return None

        features[lead] = features_for_single_lead

    return features
Exemplo n.º 11
0
def my_processing(ecg_signal):
    # Try processing
    ecg_cleaned = nk.ecg_clean(ecg_signal, sampling_rate=300, method="biosppy")
    instant_peaks, rpeaks = nk.ecg_peaks(ecg_cleaned,
                                         sampling_rate=300,
                                         method='hamilton2002')
    info = rpeaks
    try:
        # Additional info of the ecg signal
        delineate_signal, delineate_waves = nk.ecg_delineate(
            ecg_cleaned=ecg_cleaned,
            rpeaks=rpeaks,
            sampling_rate=300,
            method='cwt')
    except:
        delineate_signal = np.NaN
        delineate_waves = np.NaN
    return ecg_cleaned, delineate_signal, delineate_waves, info
Exemplo n.º 12
0
def find_R_peaks(ecg_data, samplefreq):
    try:
        _, rpeaks = nk.ecg_peaks(ecg_data, sampling_rate=samplefreq)
        r_peaks = rpeaks['ECG_R_Peaks']
        r_peaks = np.delete(r_peaks,
                            np.where(np.isnan(r_peaks))[0]).astype(int)

    except:
        print("cleaning data")
        cleaned_ecg = nk.ecg_clean(ecg_data,
                                   sampling_rate=samplefreq,
                                   method="neurokit")
        try:
            _, rpeaks = nk.ecg_peaks(cleaned_ecg, sampling_rate=samplefreq)
            r_peaks = rpeaks['ECG_R_Peaks']
            r_peaks = np.delete(r_peaks,
                                np.where(np.isnan(r_peaks))[0]).astype(int)
        except:
            print("could not analyse cleaned ECG")
            #Midlertidig løsning:
            r_peaks = np.array([0, 1, 2, 3])
    return r_peaks
    def extract_features_tmaps(
        self,
        signal_tm: TensorMap,
        clean_method: str = "neurokit",
        r_method: str = "neurokit",
        wave_method: str = "dwt",
        min_peaks: int = 200,
    ):
        """
        Function to extract the ecg features using the neurokit2 package. That
        is the P, Q, R, S and T peaks and the P, QRS and T waves onsets and
        offsets. The result is saved internally.

        :param signal_tm: <TensorMap>
        :param clean_method: <str> The processing pipeline to apply. Can be one of
                             ‘neurokit’ (default), ‘biosppy’, ‘pantompkins1985’,
                             ‘hamilton2002’, ‘elgendi2010’, ‘engzeemod2012’.
        :param r_method: <str> The algorithm to be used for R-peak detection. Can be one
                         of ‘neurokit’ (default), ‘pantompkins1985’, ‘hamilton2002’,
                         ‘christov2004’, ‘gamboa2008’, ‘elgendi2010’, ‘engzeemod2012’
                         or ‘kalidas2017’.
        :param wave_method: <str> Can be one of ‘dwt’ (default) for discrete
                            wavelet transform or ‘cwt’ for continuous wavelet transform.
        :param min_peaks: <int> Minimum R peaks to be detected to proceed with
                          further calculations.
        """
        for i, _ in enumerate(self.sampling_rate):
            sampling_rate = self.sampling_rate[i][0]
            init = self.sampling_rate[i][1]
            if i == len(self.sampling_rate) - 1:
                end = -1
            else:
                end = self.sampling_rate[i + 1][1]
            ecg_signal = signal_tm.tensor_from_file(signal_tm,
                                                    self)[0][init:end]
            ecg_signal = nk.ecg_clean(ecg_signal, sampling_rate, clean_method)

            try:
                _, r_peaks = nk.ecg_peaks(ecg_signal, sampling_rate, r_method)
            except IndexError:
                continue
            if len(r_peaks["ECG_R_Peaks"]) < min_peaks:
                continue
            _, waves_peaks = nk.ecg_delineate(ecg_signal, r_peaks,
                                              sampling_rate)
            _, waves_peaks_2 = nk.ecg_delineate(
                ecg_signal,
                r_peaks,
                sampling_rate,
                wave_method,
            )
            waves_peaks.update(waves_peaks_2)
            for peak_type in r_peaks:
                if peak_type not in self.r_peaks:
                    self.r_peaks[peak_type] = r_peaks[peak_type]
                else:
                    self.r_peaks[peak_type] = np.append(
                        self.r_peaks[peak_type],
                        r_peaks[peak_type],
                    )
            for peak_type in waves_peaks:
                if peak_type not in self.waves_peaks:
                    self.waves_peaks[peak_type] = waves_peaks[peak_type]
                else:
                    self.waves_peaks[peak_type] = np.append(
                        self.waves_peaks[peak_type],
                        waves_peaks[peak_type],
                    )

        for peak_type in self.r_peaks:
            self.r_peaks[peak_type] = list(self.r_peaks[peak_type])
        for peak_type in self.waves_peaks:
            self.waves_peaks[peak_type] = list(self.waves_peaks[peak_type])
    def extract_features(
        self,
        clean_method: str = "neurokit",
        r_method: str = "neurokit",
        wave_method: str = "dwt",
        min_peaks: int = 200,
        size: int = 200000,
    ):
        """
        Function to extract the ecg features using the neurokit2 package. That
        is the P, Q, R, S and T peaks and the P, QRS and T waves onsets and
        offsets. The result is saved internally.

        :param clean_method: <str> The processing pipeline to apply. Can be one of
                             ‘neurokit’ (default), ‘biosppy’, ‘pantompkins1985’,
                             ‘hamilton2002’, ‘elgendi2010’, ‘engzeemod2012’.
        :param r_method: <str> The algorithm to be used for R-peak detection. Can be one
                         of ‘neurokit’ (default), ‘pantompkins1985’, ‘hamilton2002’,
                         ‘christov2004’, ‘gamboa2008’, ‘elgendi2010’, ‘engzeemod2012’
                         or ‘kalidas2017’.
        :param wave_method: <str> Can be one of ‘dwt’ (default) for discrete
                            wavelet transform or ‘cwt’ for continuous wavelet transform.
        :param min_peaks: <int> Minimum R peaks to be detected to proceed with
                          further calculations.
        :param size: <int> ECG sample size to analyze per loop.
        """
        if not self.lead:
            return

        for i, _ in enumerate(self.sampling_rate):
            sampling_rate = self.sampling_rate[i][0]
            init = self.sampling_rate[i][1]
            if i == len(self.sampling_rate) - 1:
                ecg_signal_size = (
                    ECG_TMAPS[f"{self.lead}_value"].tensor_from_file(
                        ECG_TMAPS[f"{self.lead}_value"],
                        self,
                        visit=self.visit,
                    )[0][init:].shape[0])
            else:
                ecg_signal_size = self.sampling_rate[i + 1][1] - init
            if size < ecg_signal_size:
                end = init + size
            else:
                end = init + ecg_signal_size
            while init < ecg_signal_size + self.sampling_rate[i][1]:
                ecg_signal = ECG_TMAPS[f"{self.lead}_value"].tensor_from_file(
                    ECG_TMAPS[f"{self.lead}_value"],
                    self,
                    visit=self.visit,
                )[0][init:end]
                ecg_signal = nk.ecg_clean(ecg_signal, sampling_rate,
                                          clean_method)
                try:
                    _, r_peaks = nk.ecg_peaks(ecg_signal, sampling_rate,
                                              r_method)
                except IndexError:
                    init = end
                    end = init + size
                    if end > ecg_signal_size + self.sampling_rate[i][1]:
                        end = ecg_signal_size + self.sampling_rate[i][1]
                    continue

                if len(r_peaks["ECG_R_Peaks"]) < min_peaks:
                    init = end
                    end = init + size
                    if end > ecg_signal_size + self.sampling_rate[i][1]:
                        end = ecg_signal_size + self.sampling_rate[i][1]
                    continue
                _, waves_peaks = nk.ecg_delineate(ecg_signal, r_peaks,
                                                  sampling_rate)
                _, waves_peaks_2 = nk.ecg_delineate(
                    ecg_signal,
                    r_peaks,
                    sampling_rate,
                    wave_method,
                )
                waves_peaks.update(waves_peaks_2)
                for peak_type in r_peaks:
                    if peak_type not in self.r_peaks:
                        self.r_peaks[peak_type] = r_peaks[peak_type]
                    else:
                        self.r_peaks[peak_type] = np.append(
                            self.r_peaks[peak_type],
                            r_peaks[peak_type],
                        )
                for peak_type in waves_peaks:
                    if peak_type not in self.waves_peaks:
                        self.waves_peaks[peak_type] = waves_peaks[peak_type]
                    else:
                        self.waves_peaks[peak_type] = np.append(
                            self.waves_peaks[peak_type],
                            waves_peaks[peak_type],
                        )
                init = end
                end = init + size
                if end > ecg_signal_size + self.sampling_rate[i][1]:
                    end = ecg_signal_size + self.sampling_rate[i][1]

        for peak_type in self.r_peaks:
            self.r_peaks[peak_type] = list(self.r_peaks[peak_type])
        for peak_type in self.waves_peaks:
            self.waves_peaks[peak_type] = list(self.waves_peaks[peak_type])
Exemplo n.º 15
0
def create_df(dataframe: pd.DataFrame) -> pd.DataFrame:
    # get lengths of signals for each sample
    lengths = []
    width = dataframe.shape[1]

    for row in dataframe.index.tolist():
        temp_width = width
        for item in dataframe.loc[row][::-1]:
            if not pd.isna(item) and isinstance(item, float):
                temp_width -= 1
                break

            temp_width -= 1

        lengths.append(temp_width)

    """
    README
    
    For the following features we measured: [mean, median, 5 % percentile, 95 % percentile, standard deviation]
    R-peak location were retrieved by nk.ecg_peaks
    Q-peak and S-location were retrieved by nk.ecg_delineate
    
    ?_ampl_*        ?-Peak amplitude
    ?_nr_peaks      number of ?-Peaks
    ?_diff_*        Interval between ?-Peaks
    QRS_diff_*      QRS duration
    len_*           length of signal
    Qual_*          quality of signal measured with nk.ecg_quality
    sign_*          signal
    
    Also the output from nk.hrv_time which contains different measurements for the heart rate variation (HRV*) was added
    
    Additionally one 'typical' heartbeat was greated (all length 180):
    
    MN_*            mean signal
    MD_*            median signal
    P5_*            5 % percentile signal
    P95_*           95 % percentile signal
    SD_*            standard deviation of signal
    """

    names = ['R_ampl_mean', 'R_ampl_median', 'R_ampl_perc5', 'R_ampl_perc95', 'R_ampl_sd', 'R_nr_peaks',
             'len_mean', 'len_median', 'len_perc5', 'len_perc95', 'len_sd',
             'sign_mean', 'sign_median', 'sign_perc5', 'sign_perc95', 'sign_sd',
             'Qual_mean', 'Qual_median', 'Qual_perc5', 'Qual_perc95', 'Qual_sd',
             'Q_ampl_mean', 'Q_ampl_median', 'Q_ampl_perc5', 'Q_ampl_perc95', 'Q_ampl_sd', 'Q_nr_peaks',
             'Q_diff_mean', 'Q_diff_median', 'Q_diff_perc5', 'Q_diff_perc95', 'Q_diff_sd',
             'S_ampl_mean', 'S_ampl_median', 'S_ampl_perc5', 'S_ampl_perc95', 'S_ampl_sd', 'S_nr_peaks',
             'S_diff_mean', 'S_diff_median', 'S_diff_perc5', 'S_diff_perc95', 'S_diff_sd',
             'P_ampl_mean', 'P_ampl_median', 'P_ampl_perc5', 'P_ampl_perc95', 'P_ampl_sd', 'P_nr_peaks',
             'T_ampl_mean', 'T_ampl_median', 'T_ampl_perc5', 'T_ampl_perc95', 'T_ampl_sd', 'T_nr_peaks',
             'QRS_diff_mean', 'QRS_diff_median', 'QRS_diff_perc5', 'QRS_diff_perc95', 'QRS_diff_sd',
             'PR_diff_mean', 'PR_diff_median', 'PR_diff_perc5', 'PR_diff_perc95', 'PR_diff_sd',
             'RT_diff_mean', 'RT_diff_median', 'RT_diff_perc5', 'RT_diff_perc95', 'RT_diff_sd',
             'HRV_RMSSD', 'HRV_MeanNN', 'HRV_SDNN', 'HRV_SDSD', 'HRV_CVNN', 'HRV_CVSD', 'HRV_MedianNN',
             'HRV_MadNN', 'HRV_MCVNN', 'HRV_IQRNN', 'HRV_pNN50', 'HRV_pNN20', 'HRV_TINN', 'HRV_HTI',
             'HRV_ULF','HRV_VLF','HRV_LF','HRV_HF','HRV_VHF','HRV_LFHF','HRV_LFn','HRV_HFn', 	'HRV_LnHF',
             'HRV_SD1','HRV_SD2', 'HRV_SD1SD2','HRV_S','HRV_CSI','HRV_CVI','HRV_CSI_Modified', 'HRV_PIP',
             'HRV_IALS','HRV_PSS','HRV_PAS','HRV_GI','HRV_SI','HRV_AI','HRV_PI','HRV_C1d','HRV_C1a','HRV_SD1d',
             'HRV_SD1a','HRV_C2d','HRV_C2a','HRV_SD2d','HRV_SD2a','HRV_Cd','HRV_Ca','HRV_SDNNd','HRV_SDNNa','HRV_ApEn',
             'HRV_SampEn','J_LF','J_HF','J_L/H']


    template_len = 180

    mean_names = ['MN_' + str(index) for index in range(template_len)]
    median_names = ['MD_' + str(index) for index in range(template_len)]
    perc5_names = ['P5_' + str(index) for index in range(template_len)]
    perc95_names = ['P95_' + str(index) for index in range(template_len)]
    sd_names = ['SD_' + str(index) for index in range(template_len)]

    wavelet = 'db3'

    wl_len = int(np.floor((template_len + pywt.Wavelet(wavelet).dec_len - 1) / 2))

    wl_mean_names = ['WLMN_' + str(index) for index in range(2*wl_len)]
    wl_median_names = ['WLMD_' + str(index) for index in range(2*wl_len)]
    wl_perc5_names = ['WLP5_' + str(index) for index in range(2*wl_len)]
    wl_perc95_names = ['WLP95_' + str(index) for index in range(2*wl_len)]
    wl_sd_names = ['WLSD_' + str(index) for index in range(2*wl_len)]

    typical_signal_names = mean_names + median_names + perc5_names + perc95_names + sd_names + wl_mean_names + \
                           wl_median_names + wl_perc5_names + wl_perc95_names + wl_sd_names

    names += typical_signal_names

    data = np.empty([dataframe.shape[0], len(names)])

    iteration = 0
    for row_index, row in dataframe.iterrows():
        print(row_index)

        # Retrieve ECG data
        ecg_signal = row[:lengths[iteration] + 1]
        ecg_signal = nk.ecg_clean(ecg_signal, sampling_rate=SAMPLING_RATE)

        # Find R-peaks
        peaks, info = nk.ecg_peaks(ecg_signal, sampling_rate=SAMPLING_RATE)

        # R amplitude
        R_amplitudes = ecg_signal[info['ECG_R_Peaks']]

        # Check if the signal is flipped
        # Check if we have enough peaks to retrieve more information
        if len(R_amplitudes) > 4:

            _, waves_peak = nk.ecg_delineate(ecg_signal, info, sampling_rate=300, show=False)

            # Q amplitude

            # remove nan values
            Q_amplitudes = [ecg_signal[peak_index] if str(peak_index) != 'nan' else - np.infty for peak_index in
                            waves_peak['ECG_Q_Peaks']]

            if np.sum([1 if np.abs(rpeak) > np.abs(Q_amplitudes[index]) else -1 for index, rpeak in
                       enumerate(R_amplitudes)]) < 0:
                print("flip", row_index)

                ecg_signal = -ecg_signal

                peaks, info = nk.ecg_peaks(ecg_signal, sampling_rate=300)

                # R amplitude
                R_amplitudes = ecg_signal[info['ECG_R_Peaks']]

                if len(R_amplitudes) > 4:
                    _, waves_peak = nk.ecg_delineate(ecg_signal, info, sampling_rate=300, show=False)

        data_temp = []
        if len(R_amplitudes) > 0:
            data_temp = [np.mean(R_amplitudes),
                         np.median(R_amplitudes),
                         np.percentile(R_amplitudes, q=5),
                         np.percentile(R_amplitudes, q=95),
                         np.std(R_amplitudes),
                         len(R_amplitudes)]
        else:
            empty = np.empty([6])
            empty[:] = np.NaN
            data_temp += empty.tolist()

        # length of signal
        data_new = [np.mean(lengths[iteration] / SAMPLING_RATE),
                    np.median(lengths[iteration] / SAMPLING_RATE),
                    np.percentile(lengths[iteration] / SAMPLING_RATE, q=5),
                    np.percentile(lengths[iteration] / SAMPLING_RATE, q=95),
                    np.std(lengths[iteration] / SAMPLING_RATE)]

        data_temp += data_new

        # signal
        data_new = [np.mean(ecg_signal),
                    np.median(ecg_signal),
                    np.percentile(ecg_signal, q=5),
                    np.percentile(ecg_signal, q=95),
                    np.std(ecg_signal)]

        data_temp += data_new

        # Check if we have enough peaks to retrieve more information
        if len(R_amplitudes) > 4:

            quality = nk.ecg_quality(ecg_signal, sampling_rate=SAMPLING_RATE)
            data_new = [np.mean(quality),
                        np.median(quality),
                        np.percentile(quality, q=5),
                        np.percentile(quality, q=95),
                        np.std(quality)]

            data_temp += data_new

            # Delineate the ECG signal
            # “ECG_P_Peaks”, “ECG_Q_Peaks”, “ECG_S_Peaks”, “ECG_T_Peaks”, “ECG_P_Onsets”, “ECG_T_Offsets”

            # _, waves_peak = nk.ecg_delineate(ecg_signal, info, sampling_rate=SAMPLING_RATE, show=False)

            # Q amplitude

            # remove nan values
            Q_peaks = [peak for peak in waves_peak['ECG_Q_Peaks'] if str(peak) != 'nan']

            if len(Q_peaks) > 0:
                Q_amplitudes = ecg_signal[Q_peaks]

                data_new = [np.mean(Q_amplitudes),
                            np.median(Q_amplitudes),
                            np.percentile(Q_amplitudes, q=5),
                            np.percentile(Q_amplitudes, q=95),
                            np.std(Q_amplitudes),
                            len(Q_amplitudes)]

                data_temp += data_new
            else:
                empty = np.empty([6])
                empty[:] = np.NaN
                empty[5] = 0
                data_temp += empty.tolist()

            # more than 1 Q-Peak => can build interval[s]
            if len(Q_peaks) > 1:
                Q_peaks_diff = [(Q_peaks[index + 1] - Q_peaks[index]) / SAMPLING_RATE
                                for index, item in enumerate(Q_peaks[:len(Q_peaks) - 1])]

                # QQ interval

                data_new = [np.mean(Q_peaks_diff),
                            np.median(Q_peaks_diff),
                            np.percentile(Q_peaks_diff, q=5),
                            np.percentile(Q_peaks_diff, q=95),
                            np.std(Q_peaks_diff)]

                data_temp += data_new

            # 0 or 1 Q-peak = no interval => return nan
            else:
                empty = np.empty([5])
                empty[:] = np.NaN
                data_temp += empty.tolist()

            # S amplitude

            # remove nan values
            S_peaks = [peak for peak in waves_peak['ECG_S_Peaks'] if str(peak) != 'nan']

            if len(S_peaks) > 0:
                S_amplitudes = ecg_signal[S_peaks]

                data_new = [np.mean(S_amplitudes),
                            np.median(S_amplitudes),
                            np.percentile(S_amplitudes, q=5),
                            np.percentile(S_amplitudes, q=95),
                            np.std(S_amplitudes),
                            len(S_amplitudes)]

                data_temp += data_new

            else:
                empty = np.empty([6])
                empty[:] = np.NaN
                empty[5] = 0
                data_temp += empty.tolist()

            # more than one S-peak
            if len(S_peaks) > 1:
                S_peaks_diff = [(S_peaks[index + 1] - S_peaks[index]) / SAMPLING_RATE
                                for index, item in enumerate(S_peaks[:len(S_peaks) - 1])]

                # SS interval

                data_new = [np.mean(S_peaks_diff),
                            np.median(S_peaks_diff),
                            np.percentile(S_peaks_diff, q=5),
                            np.percentile(S_peaks_diff, q=95),
                            np.std(S_peaks_diff)]

                data_temp += data_new

            # 0 or 1 S-peak = no interval => return nan
            else:
                empty = np.empty([5])
                empty[:] = np.NaN
                data_temp += empty.tolist()

            P_peaks = [peak for peak in waves_peak['ECG_P_Peaks'] if str(peak) != 'nan']

            if len(P_peaks) > 0:
                P_amplitudes = ecg_signal[P_peaks]

                data_new = [np.mean(P_amplitudes),
                            np.median(P_amplitudes),
                            np.percentile(P_amplitudes, q=5),
                            np.percentile(P_amplitudes, q=95),
                            np.std(P_amplitudes),
                            len(P_amplitudes)]

                data_temp += data_new

            else:
                empty = np.empty([6])
                empty[:] = np.NaN
                empty[5] = 0
                data_temp += empty.tolist()

            T_peaks = [peak for peak in waves_peak['ECG_T_Peaks'] if str(peak) != 'nan']

            if len(T_peaks) > 0:
                T_peaks = ecg_signal[T_peaks]

                data_new = [np.mean(T_peaks),
                            np.median(T_peaks),
                            np.percentile(T_peaks, q=5),
                            np.percentile(T_peaks, q=95),
                            np.std(T_peaks),
                            len(T_peaks)]

                data_temp += data_new

            else:
                empty = np.empty([6])
                empty[:] = np.NaN
                empty[5] = 0
                data_temp += empty.tolist()


            # QRS interval

            QRS_peaks_diff = []

            # compute difference between Q and S peak
            for index in range(len(waves_peak['ECG_Q_Peaks'])):
                if not (np.isnan(waves_peak['ECG_Q_Peaks'][index]) or np.isnan(waves_peak['ECG_S_Peaks'][index])):
                    QRS_peaks_diff.append(
                        (waves_peak['ECG_S_Peaks'][index] - waves_peak['ECG_Q_Peaks'][index]) / SAMPLING_RATE)

            if len(QRS_peaks_diff) > 0:
                data_new = [np.mean(QRS_peaks_diff),
                            np.median(QRS_peaks_diff),
                            np.percentile(QRS_peaks_diff, q=5),
                            np.percentile(QRS_peaks_diff, q=95),
                            np.std(QRS_peaks_diff)]

                data_temp += data_new

            else:
                empty = np.empty([5])
                empty[:] = np.NaN
                data_temp += empty.tolist()

            # PR interval

            PR_peaks_diff = []

            # compute difference between P and R peak
            for index in range(len(waves_peak['ECG_P_Peaks'])):
                if not np.isnan(waves_peak['ECG_P_Peaks'][index]):
                    PR_peaks_diff.append(
                        (info['ECG_R_Peaks'][index] - waves_peak['ECG_P_Peaks'][index]) / SAMPLING_RATE)

            if len(PR_peaks_diff) > 0:
                data_new = [np.mean(PR_peaks_diff),
                            np.median(PR_peaks_diff),
                            np.percentile(PR_peaks_diff, q=5),
                            np.percentile(PR_peaks_diff, q=95),
                            np.std(PR_peaks_diff)]

                data_temp += data_new
            else:
                empty = np.empty([5])
                empty[:] = np.NaN
                data_temp += empty.tolist()

            # RT interval

            RT_peaks_diff = []

            # compute difference between P and R peak
            for index in range(len(waves_peak['ECG_T_Peaks'])):
                if not np.isnan(waves_peak['ECG_T_Peaks'][index]):
                    RT_peaks_diff.append(
                        (waves_peak['ECG_T_Peaks'][index] - info['ECG_R_Peaks'][index]) / SAMPLING_RATE)

            if len(RT_peaks_diff) > 0:
                data_new = [np.mean(RT_peaks_diff),
                            np.median(PR_peaks_diff),
                            np.percentile(RT_peaks_diff, q=5),
                            np.percentile(RT_peaks_diff, q=95),
                            np.std(RT_peaks_diff)]

                data_temp += data_new

            else:
                empty = np.empty([5])
                empty[:] = np.NaN
                data_temp += empty.tolist()

            # Extract clean EDA and SCR features
            # explanation of features:
            # https://neurokit2.readthedocs.io/en/latest/functions.html?highlight=hrv%20time#neurokit2.hrv.hrv_time

            hrv_time = nk.hrv(peaks, sampling_rate=SAMPLING_RATE, show=False)

            data_new = hrv_time.values.tolist()[0]

            data_temp += data_new

            # Jannik
            # http://www.paulvangent.com/2016/03/21/analyzing-a-discrete-heart-rate-signal-using-python-part-2/
            rpeaks = info['ECG_R_Peaks']
            r_interval = [rpeaks[index+1]-rpeaks[index] for index in range(len(rpeaks)-1)]
            RR_x_new = np.linspace(rpeaks[0],rpeaks[-2],rpeaks[-2])
            f = interp1d(rpeaks[:-1], r_interval, kind='cubic')

            n = lengths[iteration] + 1 # Length of the signal
            frq = np.fft.fftfreq(n, d=(1 / SAMPLING_RATE)) # divide the bins into frequency categories
            frq = frq[range(int(n/2))] # Get single side of the frequency range

            Y = np.fft.fft(f(RR_x_new))/n # Calculate FFT

            try:
                Y = Y[range(int(n / 2))]
                lf = np.trapz(abs(Y[(frq >= 0.04) & (frq <= 0.15)]))

                hf = np.trapz(abs(Y[(frq >= 0.16) & (frq <= 0.5)]))  # Do the same for 0.16-0.5Hz (HF)

                data_new = [lf, hf, lf / hf]

                data_temp += data_new
            except IndexError as err:
                print(err)
                data_temp += [None, None, None]

        # if we don't have enough R peaks return vector of nan's
        else:
            empty = np.empty([len(names) - 16 - len(typical_signal_names)])
            empty[:] = np.NaN
            data_temp += empty.tolist()

        # Create a 'typical' heartbeat

        # Scaler = StandardScaler()
        # ecg_signal = Scaler.fit_transform(X=ecg_signal.reshape(-1, 1)).reshape(1, -1)[0].tolist()

        out = ecg.ecg(signal=ecg_signal, sampling_rate=SAMPLING_RATE, show=False)

        mean = np.mean(out['templates'], axis=0)
        median = np.median(out['templates'], axis=0)
        perc5 = np.percentile(out['templates'].astype(np.float64), axis=0, q=5)
        perc95 = np.percentile(out['templates'].astype(np.float64), axis=0, q=95)
        std = np.std(out['templates'].astype(np.float64), axis=0)

        data_new = np.concatenate((mean, median, perc5, perc95, std)).tolist()

        data_temp += data_new

        (wl_mean_cA, wl_mean_cD) = pywt.dwt(np.mean(out['templates'], axis=0),
                                            'db3', 'periodic')
        (wl_median_cA, wl_median_cD) = pywt.dwt(np.median(out['templates'], axis=0),
                                                'db3', 'periodic')
        (wl_perc5_cA, wl_perc5_cD) = pywt.dwt(np.percentile(out['templates'].astype(np.float64), axis=0, q=5),
                                              'db3', 'periodic')
        (wl_perc95_cA, wl_perc95_cD) = pywt.dwt(np.percentile(out['templates'].astype(np.float64), axis=0, q=95),
                                                'db3', 'periodic')
        (wl_sd_cA, wl_sd_cD) = pywt.dwt(np.std(out['templates'].astype(np.float64), axis=0),
                                        'db3', 'periodic')

        data_new = np.concatenate((wl_mean_cA, wl_mean_cD,
                                   wl_median_cA, wl_median_cD,
                                   wl_perc5_cA, wl_perc5_cD,
                                   wl_perc95_cA, wl_perc95_cD,
                                   wl_sd_cA, wl_sd_cD)).tolist()

        data_temp += data_new

        data[iteration] = data_temp

        iteration += 1

    features = pd.DataFrame(data, columns=names)

    return features
Exemplo n.º 16
0
def compute_features(data, condition, sampling_rate=700, window_size=60, window_shift=0.25):

    index = 0
    init = time.time()

    # data cleaning
    ## ECG
    ecg_cleaned = nk.ecg_clean(data["ECG"][condition].flatten(), sampling_rate=sampling_rate)
    ## == OLD
    # ecg_rpeaks, _ = nk.ecg_peaks(ecg_cleaned, sampling_rate=sampling_rate)
    # ecg_hr = nk.signal_rate(ecg_rpeaks, sampling_rate=sampling_rate)
    ## ==
    ## EDA
    ## 5Hz lowpass filter
    eda_highcut = 5
    eda_filtered = nk.signal_filter(data['EDA'][condition].flatten(), sampling_rate=sampling_rate, highcut=eda_highcut)
    eda_cleaned = nk.standardize(eda_filtered)
    # TODO: not sure about the approach. cvxeda takes longer periods
    # phasic_tonic = nk.eda_phasic(cleaned, sampling_rate=700, method='cvxeda')
    eda_phasic_tonic = nk.eda_phasic(eda_cleaned, sampling_rate=sampling_rate)
    eda_phasic_tonic['t'] = [(1 / sampling_rate) * i for i in range(eda_phasic_tonic.shape[0])]
    eda_scr_peaks, scr_info = nk.eda_peaks(eda_phasic_tonic['EDA_Phasic'], sampling_rate=sampling_rate)
    ## EMG
    ## For 5 sec window signal
    ## More on DC Bias https://www.c-motion.com/v3dwiki/index.php/EMG:_Removing_DC_Bias
    emg_lowcut = 50
    emg_filtered_dc = nk.signal_filter(data['EMG'][condition].flatten(), sampling_rate=sampling_rate, lowcut=emg_lowcut)
    # OR 100 Hz highpass Butterworth filter followed by a constant detrending
    # filtered_dc = nk.emg_clean(chest_data_dict['EMG'][baseline].flatten(), sampling_rate=700)
    ## For 60 sec window signal
    # 50Hz lowpass filter
    emg_highcut = 50
    emg_filtered = nk.signal_filter(data['EMG'][condition].flatten(), sampling_rate=sampling_rate, highcut=emg_highcut)
    ## Resp
    ## Method biosppy important to appply bandpass filter 0.1 - 0.35 Hz
    resp_processed, _ = nk.rsp_process(data['Resp'][condition].flatten(), sampling_rate=sampling_rate, method='biosppy')

    print('Elapsed Preprocess', str(timedelta(seconds=time.time() - init)))
    init = time.time()

    chest_df_5 = pd.DataFrame() # For 5 sec window size
    chest_df = pd.DataFrame()

    window = int(sampling_rate * window_size)
    for i in range(0, data['ACC'][condition].shape[0] - window, int(sampling_rate * window_shift)):

        # ACC
        w_acc_data = data['ACC'][condition][i: window + i]
        acc_x_mean, acc_y_mean, acc_z_mean = np.mean(w_acc_data, axis=0)  # Feature
        acc_x_std, acc_y_std, acc_z_std = np.std(w_acc_data, axis=0)  # Feature
        acc_x_peak, acc_y_peak, acc_z_peak = np.amax(w_acc_data, axis=0)  # Feature
        acc_x_absint, acc_y_absint, acc_z_absint = np.abs(np.trapz(w_acc_data, axis=0))  # Feature
        xyz = np.sum(w_acc_data, axis=0)
        xyz_mean = np.mean(xyz)  # Feature
        xyz_std = np.std(xyz)  # Feature
        xyz_absint = np.abs(np.trapz(xyz))  # Feature


        # == OLD
        # ## ECG
        # w_ecg_rpeaks = ecg_rpeaks[i: window + i]
        # # HR
        # w_ecg_hr = ecg_hr[i: window + i]
        # hr_mean = np.mean(w_ecg_hr)  # Feature
        # hr_std = np.std(w_ecg_hr)  # Feature
        # # HRV Time-domain Indices
        # # HRV_MeanNN
        # # HRV_SDNN
        # # HRV_pNN50
        # # HRV_RMSSD -> Root mean square of the HRV
        # # HRV_HTI -> Triangular interpolation index
        # hrv_time = nk.hrv_time(w_ecg_rpeaks, sampling_rate=sampling_rate, show=False)
        # hrv_mean = hrv_time.loc[0, 'HRV_MeanNN']  # Feature
        # hrv_std = hrv_time.loc[0, 'HRV_SDNN']  # Feature
        # # TODO: NN50
        # # hrv_NN50 = 
        # hrv_pNN50 = hrv_time.loc[0, 'HRV_pNN50']  # Feature
        # hrv_TINN = hrv_time.loc[0, 'HRV_HTI']  # Feature
        # hrv_rms = hrv_time.loc[0, 'HRV_RMSSD']  # Feature

        # # HRV Frequency-domain Indices
        # # TODO: get NaN values within windows (*)
        # # HRV_ULF *
        # # HRV_LF *
        # # HRV_HF 
        # # HRV_VHF
        # # HRV_LFHF - Ratio LF/HF *
        # # HRV_LFn *
        # # HRV_HFn
        # hrv_freq = nk.hrv_frequency(w_ecg_rpeaks, sampling_rate=sampling_rate, ulf=(0.01, 0.04), lf=(0.04, 0.15), hf=(0.15, 0.4), vhf=(0.4, 1.))
        # hrv_ULF = hrv_freq.loc[0, 'HRV_ULF']  # Feature
        # hrv_LF = hrv_freq.loc[0, 'HRV_LF']  # Feature
        # hrv_HF = hrv_freq.loc[0, 'HRV_HF']  # Feature
        # hrv_VHF = hrv_freq.loc[0, 'HRV_VHF']  # Feature
        # hrv_lf_hf_ratio = hrv_freq.loc[0, 'HRV_LFHF']  # Feature
        # hrv_f_sum = np.nansum(np.hstack((hrv_ULF, hrv_LF, hrv_HF, hrv_VHF)))
        # # TODO: rel_f
        # # hrv_rel_f = 
        # hrv_LFn = hrv_freq.loc[0, 'HRV_LFn']  # Feature
        # hrv_HFn = hrv_freq.loc[0, 'HRV_HFn']  # Feature
        # ==

        ## ECG 
        w_ecg_cleaned = ecg_cleaned[i: window + i]
        _, ecg_info = nk.ecg_peaks(w_ecg_cleaned, sampling_rate=sampling_rate)
        w_ecg_rpeaks = ecg_info['ECG_R_Peaks']
        ecg_nni = pyhrv.tools.nn_intervals(w_ecg_rpeaks)
        # HR
        rs_hr = pyhrv.time_domain.hr_parameters(ecg_nni)
        hr_mean = rs_hr['hr_mean']  # Feature
        hr_std = rs_hr['hr_std']  # Feature
        # HRV-time
        rs_hrv = pyhrv.time_domain.nni_parameters(ecg_nni)
        hrv_mean = rs_hrv['nni_mean']  # Feature
        hrv_std = pyhrv.time_domain.sdnn(ecg_nni)['sdnn']  # Feature
        rs_nn50 = pyhrv.time_domain.nn50(ecg_nni)
        hrv_NN50 = rs_nn50['nn50']  # Feature
        hrv_pNN50 = rs_nn50['pnn50']  # Feature
        hrv_time = nk.hrv_time(w_ecg_rpeaks, sampling_rate=sampling_rate, show=False)
        hrv_TINN = hrv_time.loc[0, 'HRV_TINN']  # Feature
        hrv_rms = pyhrv.time_domain.rmssd(ecg_nni)['rmssd']  # Feature
        # HRV-freq
        hrv_freq = pyhrv.frequency_domain.welch_psd(ecg_nni, fbands={'ulf': (0.01, 0.04), 'vlf': (0.04, 0.15), 'lf': (0.15, 0.4), 'hf': (0.4, 1)}, mode='dev')
        # hrv_freq = hrv_freq.as_dict()
        hrv_freq = hrv_freq[0]
        hrv_ULF = hrv_freq['fft_abs'][0]  # Feature
        hrv_LF = hrv_freq['fft_abs'][1]  # Feature
        hrv_HF = hrv_freq['fft_abs'][2]  # Feature
        hrv_VHF = hrv_freq['fft_abs'][3]  # Feature
        hrv_lf_hf_ratio = hrv_freq['fft_ratio']  # Feature
        hrv_f_sum = hrv_freq['fft_total']  # Feature
        hrv_rel_ULF = hrv_freq['fft_rel'][0]  # Feature
        hrv_rel_LF = hrv_freq['fft_rel'][1]  # Feature
        hrv_rel_HF = hrv_freq['fft_rel'][2]  # Feature
        hrv_rel_VHF = hrv_freq['fft_rel'][3]  # Feature
        hrv_LFn = hrv_freq['fft_norm'][0]  # Feature
        hrv_HFn = hrv_freq['fft_norm'][1]  # Feature

        # EDA
        w_eda_data = eda_cleaned[i: window + i]
        w_eda_phasic_tonic = eda_phasic_tonic[i: window + i]

        eda_mean = np.mean(w_eda_data)  # Feature
        eda_std = np.std(w_eda_data)  # Feature
        eda_min = np.amin(w_eda_data)  # Feature
        eda_max = np.amax(w_eda_data)  # Feature
        # dynamic range: https://en.wikipedia.org/wiki/Dynamic_range
        eda_slope = get_slope(w_eda_data)  # Feature
        eda_drange = eda_max / eda_min  # Feature
        eda_scl_mean = np.mean(w_eda_phasic_tonic['EDA_Tonic'])  # Feature
        eda_scl_std = np.std(w_eda_phasic_tonic['EDA_Tonic'])  # Feature
        eda_scr_mean = np.mean(w_eda_phasic_tonic['EDA_Phasic'])  # Feature
        eda_scr_std = np.std(w_eda_phasic_tonic['EDA_Phasic'])  # Feature
        eda_corr_scl_t = nk.cor(w_eda_phasic_tonic['EDA_Tonic'], w_eda_phasic_tonic['t'], show=False)  # Feature
        
        eda_scr_no = eda_scr_peaks['SCR_Peaks'][i: window + i].sum()  # Feature
        # Sum amplitudes in SCR signal
        ampl = scr_info['SCR_Amplitude'][i: window + i]
        eda_ampl_sum = np.sum(ampl[~np.isnan(ampl)])  # Feature
        # TODO: 
        # eda_t_sum = 

        scr_peaks, scr_properties = scisig.find_peaks(w_eda_phasic_tonic['EDA_Phasic'], height=0)
        width_scr = scisig.peak_widths(w_eda_phasic_tonic['EDA_Phasic'], scr_peaks, rel_height=0)
        ht_scr = scr_properties['peak_heights']
        eda_scr_area = 0.5 * np.matmul(ht_scr, width_scr[1])  # Feature

        # EMG
        ## 5sec
        w_emg_data = emg_filtered_dc[i: window + i]
        emg_mean = np.mean(w_emg_data)  # Feature
        emg_std = np.std(w_emg_data)  # Feature
        emg_min = np.amin(w_emg_data)
        emg_max = np.amax(w_emg_data)
        emg_drange = emg_max / emg_min  # Feature
        emg_absint = np.abs(np.trapz(w_emg_data))  # Feature
        emg_median = np.median(w_emg_data)  # Feature
        emg_perc_10 = np.percentile(w_emg_data, 10)  # Feature
        emg_perc_90 = np.percentile(w_emg_data, 90)  # Feature
        emg_peak_freq, emg_mean_freq, emg_median_freq = get_freq_features(w_emg_data)  # Features
        # TODO: PSD -> energy in seven bands
        # emg_psd = 

        ## 60 sec
        peaks, properties = scisig.find_peaks(emg_filtered[i: window + i], height=0)
        emg_peak_no = peaks.shape[0]
        emg_peak_amp_mean = np.mean(properties['peak_heights'])  # Feature
        emg_peak_amp_std = np.std(properties['peak_heights'])  # Feature
        emg_peak_amp_sum = np.sum(properties['peak_heights'])  # Feature
        emg_peak_amp_max = np.abs(np.amax(properties['peak_heights']))
        # https://www.researchgate.net/post/How_Period_Normalization_and_Amplitude_normalization_are_performed_in_ECG_Signal
        emg_peak_amp_norm_sum = np.sum(properties['peak_heights'] / emg_peak_amp_max)  # Feature

        # Resp
        w_resp_data = resp_processed[i: window + i]
        ## Inhalation / Exhalation duration analysis
        idx = np.nan
        count = 0
        duration = dict()
        first = True
        for j in w_resp_data[~w_resp_data['RSP_Phase'].isnull()]['RSP_Phase'].to_numpy():
            if j != idx:
                if first:
                    idx = int(j)
                    duration[1] = []
                    duration [0] = []
                    first = False
                    continue
                # print('New value', j, count)
                duration[idx].append(count)
                idx = int(j)
                count = 0 
            count += 1
        resp_inhal_mean = np.mean(duration[1])  # Feature
        resp_inhal_std = np.std(duration[1])  # Feature
        resp_exhal_mean = np.mean(duration[0])  # Feature
        resp_exhal_std = np.std(duration[0])  # Feature
        resp_inhal_duration = w_resp_data['RSP_Phase'][w_resp_data['RSP_Phase'] == 1].count()
        resp_exhal_duration = w_resp_data['RSP_Phase'][w_resp_data['RSP_Phase'] == 0].count()
        resp_ie_ratio = resp_inhal_duration / resp_exhal_duration  # Feature
        resp_duration = resp_inhal_duration + resp_exhal_duration  # Feature
        resp_stretch = w_resp_data['RSP_Amplitude'].max() - w_resp_data['RSP_Amplitude'].min()  # Feature
        resp_breath_rate = len(duration[1])  # Feature
        ## Volume: area under the curve of the inspiration phase on a respiratory cycle
        resp_peaks, resp_properties = scisig.find_peaks(w_resp_data['RSP_Clean'], height=0)
        resp_width = scisig.peak_widths(w_resp_data['RSP_Clean'], resp_peaks, rel_height=0)
        resp_ht = resp_properties['peak_heights']        
        resp_volume = 0.5 * np.matmul(resp_ht, resp_width[1])  # Feature

        # Temp
        w_temp_data = data['Temp'][condition][i: window + i].flatten()
        temp_mean = np.mean(w_temp_data)  # Feature
        temp_std = np.std(w_temp_data)  # Feature
        temp_min = np.amin(w_temp_data)  # Feature
        temp_max = np.amax(w_temp_data)  # Feature
        temp_drange = temp_max / temp_min  # Feature
        temp_slope = get_slope(w_temp_data.ravel())  # Feature


        # chest_df_5 = chest_df_5.append({
        #     'ACC_x_mean': acc_x_mean, 'ACC_y_mean': acc_y_mean, 'ACC_z_mean': acc_z_mean, 'ACC_xzy_mean': xyz_mean,
        #     'ACC_x_std': acc_x_std, 'ACC_y_std': acc_y_std, 'ACC_z_std': acc_z_std, 'ACC_xyz_std': xyz_std,
        #     'ACC_x_absint': acc_x_absint, 'ACC_y_absint': acc_y_absint, 'ACC_z_absint': acc_z_absint, 'ACC_xyz_absint': xyz_absint,
        #     'ACC_x_peak': acc_x_peak, 'ACC_y_peak': acc_y_peak, 'ACC_z_peak': acc_z_peak,
        #     'EMG_mean': emg_mean, 'EMG_std': emg_std, 'EMG_drange': emg_drange, 'EMG_absint': emg_absint, 'EMG_median': emg_median, 'EMG_perc_10': emg_perc_10,
        #     'EMG_perc_90': emg_perc_90, 'EMG_peak_freq': emg_peak_freq, 'EMG_mean_freq': emg_mean_freq, 'EMG_median_freq': emg_median_freq
        # }, ignore_index=True)

        chest_df = chest_df.append({
            'ACC_x_mean': acc_x_mean, 'ACC_y_mean': acc_y_mean, 'ACC_z_mean': acc_z_mean, 'ACC_xzy_mean': xyz_mean,
            'ACC_x_std': acc_x_std, 'ACC_y_std': acc_y_std, 'ACC_z_std': acc_z_std, 'ACC_xyz_std': xyz_std,
            'ACC_x_absint': acc_x_absint, 'ACC_y_absint': acc_y_absint, 'ACC_z_absint': acc_z_absint, 'ACC_xyz_absint': xyz_absint,
            'ACC_x_peak': acc_x_peak, 'ACC_y_peak': acc_y_peak, 'ACC_z_peak': acc_z_peak,
            'ECG_hr_mean': hr_mean, 'ECG_hr_std': hr_std, 'ECG_hrv_NN50': hrv_NN50, 'ECG_hrv_pNN50': hrv_pNN50, 'ECG_hrv_TINN': hrv_TINN, 'ECG_hrv_RMS': hrv_rms,
            'ECG_hrv_ULF': hrv_ULF, 'ECG_hrv_LF': hrv_LF, 'ECG_hrv_HF': hrv_HF, 'ECG_hrv_VHF': hrv_VHF, 'ECG_hrv_LFHF_ratio': hrv_lf_hf_ratio, 'ECG_hrv_f_sum': hrv_f_sum,
            'ECG_hrv_rel_ULF': hrv_rel_ULF, 'ECG_hrv_rel_LF': hrv_rel_LF, 'ECG_hrv_rel_HF': hrv_rel_HF, 'ECG_hrv_rel_VHF': hrv_rel_VHF, 'ECG_hrv_LFn': hrv_LFn, 'ECG_hrv_HFn': hrv_HFn,
            'EDA_mean': eda_mean, 'EDA_std': eda_std, 'EDA_mean': eda_mean, 'EDA_min': eda_min, 'EDA_max': eda_max, 'EDA_slope': eda_slope,
            'EDA_drange': eda_drange, 'EDA_SCL_mean': eda_scl_mean, 'EDA_SCL_std': eda_scl_mean, 'EDA_SCR_mean': eda_scr_mean, 'EDA_SCR_std': eda_scr_std,
            'EDA_corr_SCL_t': eda_corr_scl_t, 'EDA_SCR_no': eda_scr_no, 'EDA_ampl_sum': eda_ampl_sum, 'EDA_scr_area': eda_scr_area,
            'EMG_mean': emg_mean, 'EMG_std': emg_std, 'EMG_drange': emg_drange, 'EMG_absint': emg_absint, 'EMG_median': emg_median, 'EMG_perc_10': emg_perc_10,
            'EMG_perc_90': emg_perc_90, 'EMG_peak_freq': emg_peak_freq, 'EMG_mean_freq': emg_mean_freq, 'EMG_median_freq': emg_median_freq,
            'EMG_peak_no': emg_peak_no, 'EMG_peak_amp_mean':  emg_peak_amp_mean, 'EMG_peak_amp_std':  emg_peak_amp_std, 'EMG_peak_amp_sum':  emg_peak_amp_sum,
            'EMG_peak_amp_norm_sum':  emg_peak_amp_norm_sum,
            'RESP_inhal_mean': resp_inhal_mean, 'RESP_inhal_std': resp_inhal_std, 'RESP_exhal_mean': resp_exhal_mean, 'RESP_exhal_std': resp_exhal_std,
            'RESP_ie_ratio': resp_ie_ratio, 'RESP_duration': resp_duration, 'RESP_stretch': resp_stretch, 'RESP_breath_rate': resp_breath_rate, 'RESP_volume': resp_volume,
            'TEMP_mean': temp_mean, 'TEMP_std': temp_std, 'TEMP_min': temp_min, 'TEMP_max': temp_max, 'TEMP_drange': temp_drange, 'TEMP_slope': temp_slope
        }, ignore_index=True)


        # index += 1
        # if index % 10 == 0:
        #     break
    
    print('Elapsed Process', condition.shape[0], str(timedelta(seconds=time.time() - init)))
    return chest_df, chest_df_5
Exemplo n.º 17
0
    def corr_and_featurize_ecg(self, recording, sample_freq, r_peaks, s_peaks,
                               q_peaks, p_peaks, t_peaks):
        """
        Automatically derives features from ECG-files (only .dat files for now)
        Args:
            R-peaks
            P-peaks
            T-peaks
        
            features (numpy array of str): an array of ECG-filenames in directory
            labels (numpy array): an array of labels/diagnosis
            directory (str): path to the features
            demographical_data (DataFrame): A DataFrame containing feature name, age and gender

        Returns:
            features_out (DataFrame): A DataFrame with features for all ECG-records
        """
        def interval_calc_simple(first_peak, second_peak, sample_freq):
            try:
                mean_interval = round((second_peak - first_peak).mean(), 5)
            except:
                mean_interval = float("NaN")
            try:
                std_interval = round((second_peak - first_peak).std(), 5)
            except:
                std_interval = float("NaN")
            return mean_interval, std_interval

        feature_list = []
        feature_name = []

        if len(r_peaks) and len(q_peaks) and len(s_peaks) and len(
                p_peaks) and len(t_peaks) < 3:
            try:
                temp_data = nk.ecg_process(recording, sample_freq)[0]
                r_peaks = np.where(temp_data['ECG_R_Peaks'] == 1)[0]
                p_peaks = np.where(temp_data['ECG_P_Peaks'] == 1)[0]
                q_peaks = np.where(temp_data['ECG_Q_Peaks'] == 1)[0]
                s_peaks = np.where(temp_data['ECG_S_Peaks'] == 1)[0]
                t_peaks = np.where(temp_data['ECG_T_Peaks'] == 1)[0]
                p_onset = np.where(temp_data['ECG_P_Onsets'] == 1)[0]
                t_offset = np.where(temp_data['ECG_T_Offsets'] == 1)[0]
                clean_rec = temp_data['ECG_Clean']

                analysis = True
            except:
                analysis = False
                r_peaks = np.array([1, 2])
                p_peaks = np.array([1, 2])
                q_peaks = np.array([1, 2])
                s_peaks = np.array([1, 2])
                t_peaks = np.array([1, 2])

        else:
            analysis = True
            clean_rec = nk.ecg_clean(recording)
            try:
                r_peaks = processing.peaks.correct_peaks(clean_rec,
                                                         r_peaks,
                                                         search_radius=25,
                                                         smooth_window_size=7,
                                                         peak_dir='compare')
            except:
                r_peaks = r_peaks

            try:
                q_peaks = processing.peaks.correct_peaks(clean_rec,
                                                         q_peaks,
                                                         search_radius=25,
                                                         smooth_window_size=7,
                                                         peak_dir='compare')
            except:
                q_peaks = q_peaks

            try:
                s_peaks = processing.peaks.correct_peaks(clean_rec,
                                                         s_peaks,
                                                         search_radius=25,
                                                         smooth_window_size=7,
                                                         peak_dir='compare')
            except:
                s_peaks = s_peaks

            try:
                t_peaks = processing.peaks.correct_peaks(clean_rec,
                                                         t_peaks,
                                                         search_radius=25,
                                                         smooth_window_size=7,
                                                         peak_dir='compare')
            except:
                t_peaks = t_peaks

            try:
                p_peaks = processing.peaks.correct_peaks(clean_rec,
                                                         p_peaks,
                                                         search_radius=25,
                                                         smooth_window_size=7,
                                                         peak_dir='compare')
            except:
                p_peaks = p_peaks

        if self.rpeak_int == True:
            feature_name.append("mean_rr_interval")
            feature_name.append("sd_rr_interval")
            if analysis == False:
                feature_list.append(float("nan"))
                feature_list.append(float("nan"))
            elif analysis == True:
                feature_list.append((np.diff(r_peaks) / sample_freq).mean())
                feature_list.append((np.diff(r_peaks) / sample_freq).std())

        if self.rpeak_amp == True:
            feature_name.append("mean_r_peak")
            feature_name.append("sd_r_peak")
            if analysis == False:
                feature_list.append(float("nan"))
                feature_list.append(float("nan"))
            elif analysis == True:
                feature_list.append(recording[r_peaks].mean())
                feature_list.append(recording[r_peaks].std())

        if self.ppeak_int == True:
            feature_name.append("mean_pp_interval")
            feature_name.append("sd_pp_interval")
            if analysis == False:
                feature_list.append(float("nan"))
                feature_list.append(float("nan"))
            elif analysis == True:
                feature_list.append((np.diff(p_peaks) / sample_freq).mean())
                feature_list.append((np.diff(p_peaks) / sample_freq).std())

        if self.ppeak_amp == True:
            feature_name.append("mean_p_peak")
            feature_name.append("sd_p_peak")
            if analysis == False:
                feature_list.append(float("nan"))
                feature_list.append(float("nan"))
            elif analysis == True:
                feature_list.append(clean_rec[p_peaks].mean())
                feature_list.append(clean_rec[p_peaks].std())

        if self.tpeak_int == True:
            feature_name.append("mean_tt_interval")
            feature_name.append("sd_tt_interval")
            if analysis == False:
                feature_list.append(float("nan"))
                feature_list.append(float("nan"))
            elif analysis == True:
                feature_list.append((np.diff(t_peaks) / sample_freq).mean())
                feature_list.append((np.diff(t_peaks) / sample_freq).std())

        if self.tpeak_amp == True:
            feature_name.append("mean_t_peak")
            feature_name.append("sd_t_peak")
            if analysis == False:
                feature_list.append(float("nan"))
                feature_list.append(float("nan"))
            elif analysis == True:
                feature_list.append(clean_rec[t_peaks].mean())
                feature_list.append(clean_rec[t_peaks].std())

        if self.qpeak_int == True:
            feature_name.append("mean_qq_interval")
            feature_name.append("sd_qq_interval")
            if analysis == False:
                feature_list.append(float("nan"))
                feature_list.append(float("nan"))
            elif analysis == True:
                feature_list.append((np.diff(q_peaks) / sample_freq).mean())
                feature_list.append((np.diff(q_peaks) / sample_freq).std())

        if self.qpeak_amp == True:
            feature_name.append("mean_q_peak")
            feature_name.append("sd_q_peak")
            if analysis == False:
                feature_list.append(float("nan"))
                feature_list.append(float("nan"))
            elif analysis == True:
                feature_list.append(clean_rec[q_peaks].mean())
                feature_list.append(clean_rec[q_peaks].std())

        if self.speak_int == True:
            feature_name.append("mean_q_peak")
            feature_name.append("sd_q_peak")
            if analysis == False:
                feature_list.append(float("nan"))
                feature_list.append(float("nan"))
            elif analysis == True:
                feature_list.append((np.diff(s_peaks) / sample_freq).mean())
                feature_list.append((np.diff(s_peaks) / sample_freq).std())

        if self.speak_amp == True:
            feature_name.append("mean_s_peak")
            feature_name.append("sd_s_peak")
            if analysis == False:
                feature_list.append(float("nan"))
                feature_list.append(float("nan"))
            elif analysis == True:
                feature_list.append(clean_rec[s_peaks].mean())
                feature_list.append(clean_rec[s_peaks].std())

        if self.qrs_duration == True:
            feature_name.append("qrs_mean")
            feature_name.append("qrs_std")
            if analysis == False:
                feature_list.append(float("nan"))
                feature_list.append(float("nan"))
            elif analysis == True:
                qrs_mean, qrs_std = interval_calc_simple(
                    q_peaks, s_peaks, sample_freq)
                feature_list.append(qrs_mean)
                feature_list.append(qrs_std)

        if self.qt_duration == True:
            feature_name.append("qt_mean")
            feature_name.append("qt_std")
            if analysis == False:
                feature_list.append(float("nan"))
                feature_list.append(float("nan"))
            elif analysis == True:
                qt_mean, qt_std = interval_calc_simple(q_peaks, t_peaks,
                                                       sample_freq)
                feature_list.append(qt_mean)
                feature_list.append(qt_std)

        if self.pr_duration == True:
            feature_name.append("pr_mean")
            feature_name.append("pr_std")
            if analysis == False:
                feature_list.append(float("nan"))
                feature_list.append(float("nan"))
            elif analysis == True:
                pr_mean, pr_std = interval_calc_simple(p_peaks, r_peaks,
                                                       sample_freq)
                feature_list.append(pr_mean)
                feature_list.append(pr_std)

        feature_list = np.asarray(feature_list)
        feature_name = np.asarray(feature_name)

        return feature_list, feature_name, [
            p_peaks, q_peaks, r_peaks, s_peaks, t_peaks
        ]
Exemplo n.º 18
0
def get_12ECG_features_labels(data, header_data):

    tmp_hea = header_data[0].split(' ')
    ptID = tmp_hea[0]
    num_leads = int(tmp_hea[1])
    sample_Fs= int(tmp_hea[2])
    gain_lead = np.zeros(num_leads)
    
    for ii in range(num_leads):
        tmp_hea = header_data[ii+1].split(' ')
        gain_lead[ii] = int(tmp_hea[2].split('/')[0])

    # for testing, we included the mean age of 57 if the age is a NaN
    # This value will change as more data is being released
    for iline in header_data:
        if iline.startswith('#Age'):
            tmp_age = iline.split(': ')[1].strip()
            age = int(tmp_age if tmp_age != 'NaN' else 57)
        elif iline.startswith('#Sex'):
            tmp_sex = iline.split(': ')[1]
            if tmp_sex.strip()=='Female':
                sex =1
            else:
                sex=0
        elif iline.startswith('#Dx'):
            label = iline.split(': ')[1].split(',')[0]

    signal = data[1]
    gain = gain_lead[1]

    N = len(signal)
    sp= sample_Fs/N    # resolución espectral

    Y = np.fft.fft(signal*gain)
    ff = np.linspace(0, (N/2)*sp, N/2).flatten()
    fmax = float(ff[np.where(np.abs(Y[0:N//2]) == max(np.abs(Y[0:N//2])))])


#   We are only using data from lead1
    peaks,idx = detect_peaks(signal,sample_Fs,gain)
       
#   mean
    mean_RR = np.mean(idx/sample_Fs*1000)
    mean_R_Peaks = np.mean(peaks*gain)

#   median
    median_RR = np.median(idx/sample_Fs*1000)
    median_R_Peaks = np.median(peaks*gain)

#   standard deviation
    std_RR = np.std(idx/sample_Fs*1000)
    std_R_Peaks = np.std(peaks*gain)

#   variance
    var_RR = stats.tvar(idx/sample_Fs*1000)
    var_R_Peaks = stats.tvar(peaks*gain)

#   Skewness
    skew_RR = stats.skew(idx/sample_Fs*1000)
    skew_R_Peaks = stats.skew(peaks*gain)

#   Kurtosis
    kurt_RR = stats.kurtosis(idx/sample_Fs*1000)
    kurt_R_Peaks = stats.kurtosis(peaks*gain)

#   RMSSD (HRV)
    rmssd = np.sqrt(np.mean(np.square(np.diff(idx))))

#   All Peaks
    ecg_signal = nk.ecg_clean(signal*gain, sampling_rate=sample_Fs, method="biosppy")
    _ , rpeaks = nk.ecg_peaks(ecg_signal, sampling_rate=sample_Fs)
    try:
        signal_peak, waves_peak = nk.ecg_delineate(ecg_signal, rpeaks, sampling_rate=sample_Fs)
        t_peaks = waves_peak['ECG_T_Peaks']
        p_peaks = waves_peak['ECG_P_Peaks']
        q_peaks = waves_peak['ECG_Q_Peaks']
        s_peaks = waves_peak['ECG_S_Peaks']
        p_onsets = waves_peak['ECG_P_Onsets']
        t_offsets = waves_peak['ECG_T_Offsets']
    except ValueError:
        print('Exception raised!')
        pass

#   T Peaks
    t_peaks = np.asarray(t_peaks, dtype=float)
    t_peaks = t_peaks[~np.isnan(t_peaks)]
    t_peaks = [int(a) for a in t_peaks]
    mean_T_Peaks = np.mean([signal[w] for w in t_peaks])

#   P peaks
    p_peaks = np.asarray(p_peaks, dtype=float)
    p_peaks = p_peaks[~np.isnan(p_peaks)]
    p_peaks = [int(a) for a in p_peaks]
    mean_P_Peaks = np.mean([signal[w] for w in p_peaks])    

#   Q peaks
    q_peaks = np.asarray(q_peaks, dtype=float)
    q_peaks = q_peaks[~np.isnan(q_peaks)]
    q_peaks = [int(a) for a in q_peaks]
    mean_Q_Peaks = np.mean([signal[w] for w in q_peaks])

#   S peaks
    s_peaks = np.asarray(s_peaks, dtype=float)
    s_peaks = s_peaks[~np.isnan(s_peaks)]
    s_peaks = [int(a) for a in s_peaks]
    mean_S_Peaks = np.mean([signal[w] for w in s_peaks])

#   P Onsets
    p_onsets = np.asarray(p_onsets, dtype=float)
    # p_onsets = p_onsets[~np.isnan(p_onsets)]
    mean_P_Onsets = np.mean(p_onsets/sample_Fs*1000)

#   T Onsets
    t_offsets = np.asarray(t_offsets, dtype=float)
    # t_offsets = t_offsets[~np.isnan(t_offsets)]
    mean_T_offsets = np.mean(t_offsets/sample_Fs*1000)

    features = [age,sex,fmax,mean_RR,mean_R_Peaks,mean_T_Peaks,mean_P_Peaks,mean_Q_Peaks,mean_S_Peaks,median_RR,median_R_Peaks,std_RR,std_R_Peaks,var_RR,var_R_Peaks,skew_RR,skew_R_Peaks,kurt_RR,kurt_R_Peaks,mean_P_Onsets,mean_T_offsets,rmssd,label]
  
    return features
Exemplo n.º 19
0
def get_HRVs_values(data, header_data):

    filter_lowcut = 0.001
    filter_highcut = 15.0
    filter_order = 1

    tmp_hea = header_data[0].split(' ')
    ptID = tmp_hea[0]
    num_leads = int(tmp_hea[1])
    sample_Fs= int(tmp_hea[2])
    gain_lead = np.zeros(num_leads)
    
    for ii in range(num_leads):
        tmp_hea = header_data[ii+1].split(' ')
        gain_lead[ii] = int(tmp_hea[2].split('/')[0])

    # for testing, we included the mean age of 57 if the age is a NaN
    # This value will change as more data is being released
    for iline in header_data:
        if iline.startswith('#Age'):
            tmp_age = iline.split(': ')[1].strip()
            age = int(tmp_age if tmp_age != 'NaN' else 57)
            # age = int(tmp_age)
        elif iline.startswith('#Sex'):
            tmp_sex = iline.split(': ')[1]
            if tmp_sex.strip()=='Female':
                sex =1
            else:
                sex=0
        elif iline.startswith('#Dx'):
            label = iline.split(': ')[1].split(',')[0]

    signal = data[1]
    gain = gain_lead[1]

    ecg_signal = nk.ecg_clean(signal*gain, sampling_rate=sample_Fs, method="biosppy")
    _ , rpeaks = nk.ecg_peaks(ecg_signal, sampling_rate=sample_Fs)
    hrv_time = nk.hrv_time(rpeaks, sampling_rate=sample_Fs)
    
    peaks, idx = detect_peaks(signal, sample_Fs, gain)
    # print(len(signal), len(idx))
    rr_intervals = idx / (sample_Fs * 1000)
    rr_intervals = pd.Series(rr_intervals)
    rr_ma = rr_intervals.rolling(3)

    try:
        signal_peak, waves_peak = nk.ecg_delineate(ecg_signal, rpeaks, sampling_rate=sample_Fs)
        p_peaks = waves_peak['ECG_P_Peaks']
    except ValueError:
        print('Exception raised!')
        pass
    p_peaks = np.asarray(p_peaks, dtype=float)
    p_peaks = p_peaks[~np.isnan(p_peaks)]
    p_peaks = [int(a) for a in p_peaks]
    p_time = [x/sample_Fs for x in p_peaks]
    p_diff = np.diff(p_time)
    # mean_P_Peaks = np.mean([signal[w] for w in p_peaks])
    hrv_time['var_P_time'] = stats.tvar(p_diff)
    hrv_time['var_P_peaks'] = stats.tvar(signal[np.array(p_peaks)])
    
    hrv_time['age'] = age
    hrv_time['label'] = label
    
    return hrv_time