def missing_imputation( tm: TensorMap, hd5: h5py.File, visit: str, indices: List[int], period: str, tensor: np.ndarray, imputation_type: str = None, **kwargs, ): if imputation_type == "sample_and_hold": if len(tensor) == 0 or np.isnan(tensor).all(): if period == "pre": values = tm.tensor_from_file(tm, hd5, visits=visit, **kwargs)[0][:indices[-1]] indice = -1 else: values = tm.tensor_from_file(tm, hd5, visits=visit, **kwargs)[0][indices[0]:] indice = 0 imputation = values[~np.isnan(values)] if imputation.size == 0: imputation = np.array([np.nan]) tensor = np.array([imputation[indice]]) elif imputation_type: name = tm.name.replace(f"_{imputation_type}", "") imputation = ICU_TMAPS_METADATA[name][imputation_type] tensor = np.nan_to_num(tensor, nan=imputation) if len(tensor) == 0: tensor = np.array([imputation]) return tensor
def get_sliding_windows( hd5, window: int, step: int, event_tm_1: TensorMap, event_tm_2: TensorMap, visit_tm: TensorMap, buffer_adm_time: int = 24, **kwargs, ): """ Create a sliding window from the time associated to <event_tm_1> to <event_tm_2> with step size <step> and window length <window>. """ if not hasattr(get_sliding_windows, "windows_cache"): get_sliding_windows.windows_cache = {} if hd5.id in get_sliding_windows.windows_cache: return get_sliding_windows.windows_cache[hd5.id] visit = visit_tm.tensor_from_file(visit_tm, hd5, **kwargs)[0] event_time_1 = event_tm_1.tensor_from_file(event_tm_1, hd5, visits=visit, unix_dates=True, **kwargs) event_time_1 = event_time_1[0][0] event_time_2 = event_tm_2.tensor_from_file(event_tm_2, hd5, visits=visit, **kwargs) event_time_2 = event_time_2[0][0] windows = np.arange( event_time_1 + (buffer_adm_time + window) * 60 * 60, event_time_2, step * 60 * 60, ) get_sliding_windows.windows_cache[hd5.id] = windows if windows.size == 0: raise ValueError( "It is not possible to compute a sliding window with the given parameters.", ) return windows
def extract_features_tmaps( self, signal_tm: TensorMap, clean_method: str = "neurokit", r_method: str = "neurokit", wave_method: str = "dwt", min_peaks: int = 200, ): """ Function to extract the ecg features using the neurokit2 package. That is the P, Q, R, S and T peaks and the P, QRS and T waves onsets and offsets. The result is saved internally. :param signal_tm: <TensorMap> :param clean_method: <str> The processing pipeline to apply. Can be one of ‘neurokit’ (default), ‘biosppy’, ‘pantompkins1985’, ‘hamilton2002’, ‘elgendi2010’, ‘engzeemod2012’. :param r_method: <str> The algorithm to be used for R-peak detection. Can be one of ‘neurokit’ (default), ‘pantompkins1985’, ‘hamilton2002’, ‘christov2004’, ‘gamboa2008’, ‘elgendi2010’, ‘engzeemod2012’ or ‘kalidas2017’. :param wave_method: <str> Can be one of ‘dwt’ (default) for discrete wavelet transform or ‘cwt’ for continuous wavelet transform. :param min_peaks: <int> Minimum R peaks to be detected to proceed with further calculations. """ for i, _ in enumerate(self.sampling_rate): sampling_rate = self.sampling_rate[i][0] init = self.sampling_rate[i][1] if i == len(self.sampling_rate) - 1: end = -1 else: end = self.sampling_rate[i + 1][1] ecg_signal = signal_tm.tensor_from_file(signal_tm, self)[0][init:end] ecg_signal = nk.ecg_clean(ecg_signal, sampling_rate, clean_method) try: _, r_peaks = nk.ecg_peaks(ecg_signal, sampling_rate, r_method) except IndexError: continue if len(r_peaks["ECG_R_Peaks"]) < min_peaks: continue _, waves_peaks = nk.ecg_delineate(ecg_signal, r_peaks, sampling_rate) _, waves_peaks_2 = nk.ecg_delineate( ecg_signal, r_peaks, sampling_rate, wave_method, ) waves_peaks.update(waves_peaks_2) for peak_type in r_peaks: if peak_type not in self.r_peaks: self.r_peaks[peak_type] = r_peaks[peak_type] else: self.r_peaks[peak_type] = np.append( self.r_peaks[peak_type], r_peaks[peak_type], ) for peak_type in waves_peaks: if peak_type not in self.waves_peaks: self.waves_peaks[peak_type] = waves_peaks[peak_type] else: self.waves_peaks[peak_type] = np.append( self.waves_peaks[peak_type], waves_peaks[peak_type], ) for peak_type in self.r_peaks: self.r_peaks[peak_type] = list(self.r_peaks[peak_type]) for peak_type in self.waves_peaks: self.waves_peaks[peak_type] = list(self.waves_peaks[peak_type])
def compute_feature( tm: TensorMap, hd5: h5py.File, visit: str, indices: List[int], feature: str, period: str, imputation_type: str = None, **kwargs, ): if tm.name.endswith("_timeseries") and feature in [ "mean", "std", "count", "mean_crossing_rate", ]: raise KeyError( f"To compute {feature} use signal_value, not signal_timeseries.", ) if not tm.name.endswith("_timeseries") and feature in ["mean_slope"]: raise KeyError( f"To compute {feature} use signal_timeseries, not signal_value.", ) if len(indices) == 0: tensor = np.array([np.nan]) elif feature == "raw": if tm.name.endswith("_timeseries"): tensor = tm.tensor_from_file(tm, hd5, visits=visit, **kwargs)[0][:, indices] else: tensor = tm.tensor_from_file(tm, hd5, visits=visit, **kwargs)[0][indices] elif feature == "mean_slope": values = tm.tensor_from_file(tm, hd5, visits=visit, **kwargs)[0][:, indices] values = np.delete(values, np.where(np.isnan(values))[1], 1) if values.size <= 1: tensor = np.array([np.nan]) else: tensor = np.nanmean((values[0, 1:] - values[0, :-1]) / (values[1, 1:] - values[1, :-1]), ) else: if tm.name.endswith("_timeseries"): values = tm.tensor_from_file(tm, hd5, visits=visit, **kwargs)[0][0, indices] else: values = tm.tensor_from_file(tm, hd5, visits=visit, **kwargs)[0][indices] values = values[~np.isnan(values)] if feature.endswith("_last_values"): number_of_samples = int(feature.split("_")[0]) values = values[-number_of_samples:] while feature != "count" and values.size < number_of_samples: values = np.append(np.nan, values) if feature == "count": tensor = values.size elif values.size == 0: tensor = np.array([np.nan]) elif feature in ("last", "first"): tensor = values[-1] if feature == "last" else values[0] elif feature == "min": tensor = np.min(values) elif feature == "max": tensor = np.max(values) elif feature == "median": tensor = np.median(values) elif feature == "mean": tensor = np.mean(values) elif feature == "std": tensor = np.std(values) elif feature == "mean_crossing_rate": mean = np.mean(values) values = np.sign(values - mean) tensor = np.where(values[1:] - values[:-1])[0].size else: raise KeyError("Unable to compute feature {feature}.") tensor = missing_imputation( tm=tm, hd5=hd5, visit=visit, indices=indices, period=period, tensor=tensor, imputation_type=imputation_type, **kwargs, ) if tm.name.endswith("_timeseries") and feature in [ "min", "max", "median", "first", "last", ]: # Obtain time indice where the feature is found if np.isnan(tensor).all(): tensor = np.array([np.nan, np.nan]) elif feature in ("last", "first"): sample_time = -1 if feature == "last" else 0 else: # We obtain the argmin of the absolute value of the difference, that is # the index of the sample that has the closest value to the feature # If there are more than two values with the feature value, # this approach will return the first one. If the period is pre event, # we want the last one (closest to the event) so the array is reversed if period == "pre": sample_time = abs( np.flip( tm.tensor_from_file(tm, hd5, visits=visit, ** kwargs)[0][0, indices, ] - tensor, ), ).argmin() # As we reversed the array, we recompute the original indice sample_time = len(indices) - sample_time - 1 else: sample_time = abs( tm.tensor_from_file(tm, hd5, visits=visit, ** kwargs)[0][0, indices] - tensor, ).argmin() time = tm.tensor_from_file(tm, hd5, visits=visit, **kwargs)[0][1, indices][sample_time] tensor = np.array([tensor, time]) return tensor if isinstance(tensor, np.ndarray) else np.array([tensor])