def tensor_from_file(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) tensor = np.zeros(shape, dtype=float) for i, ecg_date in enumerate(ecg_dates): path = make_hd5_path(tm, ecg_date, lead) try: lead_len = data[path].attrs["len"] lead_len = f"{channel_prefix}{lead_len}" matched = False for cm in tm.channel_map: if lead_len.lower() == cm.lower(): slices = ((i, tm.channel_map[cm]) if dynamic else (tm.channel_map[cm], )) tensor[slices] = 1.0 matched = True break if not matched: slices = ((i, tm.channel_map[channel_unknown]) if dynamic else (tm.channel_map[channel_unknown], )) tensor[slices] = 1.0 except KeyError: logging.debug( f"Could not get voltage length for lead {lead} from ECG on" f" {ecg_date} in {data.id}", ) return tensor
def sampling_frequency_from_file(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) if tm.interpretation == Interpretation.CATEGORICAL: tensor = np.zeros(shape, dtype=np.float32) else: tensor = np.full(shape, fill, dtype=np.float32) for i, ecg_date in enumerate(ecg_dates): path = make_hd5_path(tm, ecg_date, lead) lead_length = data[path].attrs["len"] sampling_frequency = lead_length / duration try: if tm.interpretation == Interpretation.CATEGORICAL: matched = False sampling_frequency = f"{channel_prefix}{sampling_frequency}" for cm in tm.channel_map: if sampling_frequency.lower() == cm.lower(): slices = ((i, tm.channel_map[cm]) if dynamic else (tm.channel_map[cm], )) tensor[slices] = 1.0 matched = True break if not matched: slices = ((i, tm.channel_map[channel_unknown]) if dynamic else (tm.channel_map[channel_unknown], )) tensor[slices] = 1.0 else: tensor[i] = sampling_frequency except (KeyError, ValueError): logging.debug( f"Could not calculate sampling frequency from ECG on {ecg_date} in" f" {data.id}", ) return tensor
def tensor_from_file(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) tensor = np.zeros(shape, dtype=np.float32) for ecg_idx, ecg_date in enumerate(ecg_dates): read = "" for key in keys: path = make_hd5_path(tm, ecg_date, key) if path not in data: continue read += data[path][()] read = read.lower() found = False for channel, channel_idx in sorted( tm.channel_map.items(), key=lambda cm: cm[1], ): if channel not in channel_terms: continue if any( re.search(term.lower(), read) is not None for term in channel_terms[channel]): slices = (ecg_idx, channel_idx) if dynamic else (channel_idx, ) tensor[slices] = 1 found = True break if not found: not_found_idx = tm.channel_map[not_found_channel] slices = (ecg_idx, not_found_idx) if dynamic else (not_found_idx, ) tensor[slices] = 1 return tensor
def get_ecg_datetime(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) tensor = np.full(shape, "", dtype=f"<U19") for i, ecg_date in enumerate(ecg_dates): tensor[i] = ecg_date return tensor
def tff_any(tm: TensorMap, data: PatientData) -> np.ndarray: surgery_dates = tm.time_series_filter(data) tensor = data[STS_PREFIX].loc[surgery_dates.index, sts_outcome_keys].to_numpy() tensor = tensor.any(axis=1).astype(int) tensor = np.array([binarize("any", x) for x in tensor]) if not is_dynamic_shape(tm): tensor = tensor[0] return tensor
def tensor_from_file(tm: TensorMap, data: PatientData) -> np.ndarray: surgery_dates = tm.time_series_filter(data) tensor = data[STS_PREFIX].loc[surgery_dates.index, key].to_numpy() if tm.channel_map is None: raise ValueError(f"{tm.name} channel map is None") tensor = np.array([one_hot(tm.channel_map, x) for x in tensor]) if not is_dynamic_shape(tm): tensor = tensor[0] return tensor
def tensor_from_file(tm: TensorMap, data: PatientData) -> np.ndarray: surgery_dates = tm.time_series_filter(data) tensor = data[STS_PREFIX].loc[surgery_dates.index, key].to_numpy() tensor = np.array( [binarize(key, x, negative_value, positive_value) for x in tensor], ) if not is_dynamic_shape(tm): tensor = tensor[0] return tensor
def voltage_zeros(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) tensor = np.zeros(shape, dtype=np.float32) for i, ecg_date in enumerate(ecg_dates): for cm in tm.channel_map: path = make_hd5_path(tm, ecg_date, cm) voltage = data[path][()] slices = (i, tm.channel_map[cm]) if dynamic else ( tm.channel_map[cm], ) tensor[slices] = np.count_nonzero(voltage == 0) return tensor
def ecg_acquisition_year(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) tensor = np.zeros(shape, dtype=int) for i, ecg_date in enumerate(ecg_dates): path = make_hd5_path(tm, ecg_date, "acquisitiondate") try: acquisition = data[path][()] tensor[i] = _ecg_str2date(acquisition).year except KeyError: pass return tensor
def tensor_from_file(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) tensor = np.zeros(shape, dtype=np.float32) for i, ecg_date in enumerate(ecg_dates): for cm in tm.channel_map: try: path = make_hd5_path(tm, ecg_date, cm) slices = ((i, tm.channel_map[cm]) if dynamic else (tm.channel_map[cm], )) tensor[slices] = data[path].attrs[volt_attr] except KeyError: pass return tensor
def tensor_from_file(tm, data): ecg_dates = tm.time_series_filter(data) if no_pacemaker: pacemaker_tm.time_series_filter = tm.time_series_filter pacemaker_tensor = pacemaker_tm.tensor_from_file( pacemaker_tm, data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) if conv_2d: shape = shape[:-1] voltage_length = shape[1] if dynamic else shape[0] tensor = np.zeros(shape, dtype=np.float32) for i, ecg_date in enumerate(ecg_dates): if no_pacemaker and pacemaker_tensor[i, 1] == 1: continue for cm in tm.channel_map: try: path = make_hd5_path(tm=tm, date_key=ecg_date, value_key=cm) voltage = data[path][()] path_waveform_samplebase = make_hd5_path( tm=tm, date_key=ecg_date, value_key="waveform_samplebase", ) try: fs = float(data[path_waveform_samplebase][()]) except: fs = 250 if exact_length: assert len(voltage) == voltage_length voltage = _resample_voltage( voltage=voltage, desired_samples=voltage_length, fs=fs, ) slices = ((i, ..., tm.channel_map[cm]) if dynamic else (..., tm.channel_map[cm])) tensor[slices] = voltage except (KeyError, AssertionError, ValueError): logging.debug( f"Could not get voltage for lead {cm} with {voltage_length}" f" samples in {data.id}", ) if conv_2d: tensor = tensor[..., None] return tensor
def get_ecg_tensor(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) if tm.interpretation == Interpretation.LANGUAGE: tensor = np.full(shape, "", dtype=object) elif tm.interpretation == Interpretation.CONTINUOUS: tensor = (np.zeros(shape, dtype=float) if fill == 0 else np.full(shape, fill, dtype=float)) elif tm.interpretation == Interpretation.CATEGORICAL: tensor = np.zeros(shape, dtype=float) else: raise NotImplementedError( f"unsupported interpretation for ecg tmaps: {tm.interpretation}", ) for i, ecg_date in enumerate(ecg_dates): path = make_hd5_path(tm, ecg_date, key) try: value = data[path][()] if tm.interpretation == Interpretation.CATEGORICAL: matched = False value = f"{channel_prefix}{value}" for cm in tm.channel_map: if value.lower() == cm.lower(): slices = ((i, tm.channel_map[cm]) if dynamic else (tm.channel_map[cm], )) tensor[slices] = 1.0 matched = True break if not matched: slices = ((i, tm.channel_map[channel_unknown]) if dynamic else (tm.channel_map[channel_unknown], )) tensor[slices] = 1.0 else: tensor[i] = value except (KeyError, ValueError): logging.debug( f"Could not obtain tensor {tm.name} from ECG on {ecg_date} in" f" {data.id}", ) if tm.interpretation == Interpretation.LANGUAGE: tensor = tensor.astype(str) return tensor
def voltage_stat(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) tensor = np.zeros(shape, dtype=np.float32) for i, ecg_date in enumerate(ecg_dates): try: slices = (lambda stat: (i, tm.channel_map[stat]) if dynamic else (tm.channel_map[stat], )) path = lambda lead: make_hd5_path(tm, ecg_date, lead) voltages = np.array( [data[path(lead)][()] for lead in ECG_REST_LEADS_ALL]) tensor[slices("mean")] = np.mean(voltages) tensor[slices("std")] = np.std(voltages) tensor[slices("min")] = np.min(voltages) tensor[slices("max")] = np.max(voltages) tensor[slices("median")] = np.median(voltages) except KeyError: logging.warning( f"Could not get voltage stats for ECG at {data.id}") return tensor
def ecg_bmi(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) tensor = np.zeros(shape, dtype=float) for i, ecg_date in enumerate(ecg_dates): path = lambda key: make_hd5_path(tm, ecg_date, key) try: weight_lbs = float(data[path("weightlbs")][()]) height_in = float(data[path("heightin")][()]) if (height_in < MIN_HEIGHT_IN or height_in > MAX_HEIGHT_IN or weight_lbs < MIN_WEIGHT_LBS or weight_lbs > MAX_WEIGHT_LBS): raise ValueError(f"Height/Weight outside valid range") weight_kg = 0.454 * weight_lbs height_m = 0.0254 * height_in bmi = weight_kg / (height_m**2) tensor[i] = bmi except (KeyError, ZeroDivisionError, ValueError): pass return tensor
def get_ecg_age_from_hd5(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) tensor = np.full(shape, fill_value=-1, dtype=float) for i, ecg_date in enumerate(ecg_dates): if i >= shape[0]: break path = lambda key: make_hd5_path(tm, ecg_date, key) try: birthday = data[path("dateofbirth")][()] acquisition = data[path("acquisitiondate")][()] delta = _ecg_str2date(acquisition) - _ecg_str2date(birthday) years = delta.days / YEAR_DAYS tensor[i] = years except KeyError: try: tensor[i] = data[path("patientage")][()] except KeyError: logging.debug( f"Could not get patient date of birth or age from ECG on {ecg_date}" f" in {data.id}", ) return tensor
def tensor_from_file(tm: TensorMap, data: PatientData) -> np.ndarray: echo_dates = tm.time_series_filter(data) tensor = data[ECHO_PREFIX].loc[echo_dates.index, key].to_numpy() if is_dynamic_shape(tm): tensor = tensor[:, None] return tensor