Пример #1
0
def create_alarm_tmap(tm_name: str, alarm_name: str) -> Optional[TensorMap]:
    tm = None
    if tm_name == f"{alarm_name}_init_date":
        tm = TensorMap(
            name=tm_name,
            shape=(None, None),  # type: ignore
            interpretation=Interpretation.EVENT,
            tensor_from_file=make_alarm_array_tensor_from_file(),
            path_prefix=f"bedmaster/*/alarms/{alarm_name}/start_date",
        )
    elif tm_name == f"{alarm_name}_duration":
        tm = TensorMap(
            name=tm_name,
            shape=(None, None),  # type: ignore
            interpretation=Interpretation.CONTINUOUS,
            tensor_from_file=make_alarm_array_tensor_from_file(),
            path_prefix=f"bedmaster/*/alarms/{alarm_name}/duration",
        )
    elif tm_name == f"{alarm_name}_level":
        tm = TensorMap(
            name=tm_name,
            shape=(None,),  # type: ignore
            interpretation=Interpretation.CONTINUOUS,
            tensor_from_file=make_alarm_attribute_tensor_from_file("level"),
            path_prefix=f"bedmaster/*/alarms/{alarm_name}",
        )

    return tm
Пример #2
0
def create_event_department_tmap(tm_name: str, signal_name: str,
                                 data_type: str):
    tm = None
    name = signal_name.replace("|_", "")
    if tm_name == f"{name}_departments":
        tm = TensorMap(
            name=tm_name,
            interpretation=Interpretation.CONTINUOUS,
            tensor_from_file=make_event_department_tensor_from_file(),
            path_prefix=f"edw/*/{data_type}/{signal_name}/start_date",
            channel_map={
                "mgh blake 8 card sicu": 0,
                "mgh ellison 8 cardsurg": 1,
                "mgh ellison 9 med\\ccu": 2,
                "mgh ellison 10 stp dwn": 3,
                "mgh ellison11 card\\int": 4,
                "other": 5,
            },
        )
    elif tm_name == f"{name}_departments_with_bm":
        tm = TensorMap(
            name=tm_name,
            interpretation=Interpretation.CONTINUOUS,
            tensor_from_file=make_event_department_tensor_from_file(True),
            path_prefix=f"edw/*/{data_type}/{signal_name}/start_date",
            channel_map={
                "mgh blake 8 card sicu": 0,
                "mgh ellison 8 cardsurg": 1,
                "mgh ellison 9 med\\ccu": 2,
                "mgh ellison 10 stp dwn": 3,
                "mgh ellison11 card\\int": 4,
                "other": 5,
            },
        )
    return tm
Пример #3
0
def create_ecg_feature_tmap(tm_name: str):
    tm = None
    match = None

    if not match:
        pattern = re.compile(r"(.*)_(i|ii|iii|v)$")
        match = pattern.findall(tm_name)
        if match:
            peak_name, lead = match[0]
            tm = TensorMap(
                name=tm_name,
                shape=(None, None),
                interpretation=Interpretation.EVENT,
                tensor_from_file=make_ecg_peak_tensor_from_file(lead),
                path_prefix=f"bedmaster/*/ecg_features/{lead}/ecg_{peak_name}",
            )
    if not match:
        pattern = re.compile(r"(.*)_(i|ii|iii|v)_(timeseries|value|time)$")
        match = pattern.findall(tm_name)
        if match:
            feature_name, lead, tm_type = match[0]
            if (feature_name.startswith("r") or feature_name.startswith("q")
                    or feature_name.startswith("pr")
                    or feature_name.startswith("s")):
                ref_peak = "r_peak"
            elif feature_name.startswith("p"):
                ref_peak = "p_peak"
            elif feature_name.startswith("t"):
                ref_peak = "t_peak"
            if tm_type == "timeseries":
                tm = TensorMap(
                    name=tm_name,
                    shape=(None, None),
                    interpretation=Interpretation.TIMESERIES,
                    tensor_from_file=make_ecg_feature_tensor_from_file(
                        f"{lead}_{ref_peak}", ),
                    path_prefix=
                    f"bedmaster/*/ecg_features/{lead}/{feature_name}",
                )
            elif tm_type == "value":
                tm = TensorMap(
                    name=tm_name,
                    shape=(None, None),
                    interpretation=Interpretation.CONTINUOUS,
                    tensor_from_file=make_ecg_feature_tensor_from_file(),
                    path_prefix=
                    f"bedmaster/*/ecg_features/{lead}/{feature_name}",
                )
            elif tm_type == "time":
                tm = TensorMap(
                    name=tm_name,
                    shape=(None, None),
                    interpretation=Interpretation.EVENT,
                    tensor_from_file=make_ecg_peak_tensor_from_file(lead),
                    path_prefix=
                    f"bedmaster/*/ecg_features/{lead}/ecg_{ref_peak}",
                )
    return tm
Пример #4
0
def create_around_explore_tmap(tmap_name: str) -> Optional[TensorMap]:
    match = None
    if not match:
        pattern = re.compile(
            r"^(.*)_(\d+)_hrs_(pre|post)_(.*)_(\d+)_hrs_window_explore$", )
        match = pattern.findall(tmap_name)
        if match:
            make_tensor_from_file = make_around_event_explore_tensor_from_file(
                tmap_name.replace("_explore", ""), )
            channel_map = {
                "min": 0,
                "max": 1,
                "mean": 2,
                "std": 3,
                "first": 4,
                "last": 5,
                "count": 6,
            }
            path_prefix = create_around_tmap(tmap_name.replace(
                "_explore", ""), ).path_prefix
            return TensorMap(
                name=tmap_name,
                tensor_from_file=make_tensor_from_file,
                channel_map=channel_map,
                path_prefix=path_prefix,
                interpretation=Interpretation.CONTINUOUS,
            )
    return None
Пример #5
0
def create_event_tmap(tm_name: str, event_name: str, event_type: str):
    tm = None
    name = event_name.replace("|_", "")
    if tm_name == f"{name}_start_date":
        tm = TensorMap(
            name=tm_name,
            shape=(None, None),  # type: ignore
            interpretation=Interpretation.EVENT,
            tensor_from_file=make_event_tensor_from_file(),
            path_prefix=f"edw/*/{event_type}/{event_name}/start_date",
        )
    elif tm_name == f"{name}_end_date":
        tm = TensorMap(
            name=tm_name,
            shape=(None, None),  # type: ignore
            interpretation=Interpretation.EVENT,
            tensor_from_file=make_event_tensor_from_file(),
            path_prefix=f"edw/*/{event_type}/{event_name}/end_date",
        )
    elif tm_name == f"{name}_double":
        tm = TensorMap(
            name=tm_name,
            shape=(2,),
            interpretation=Interpretation.CATEGORICAL,
            tensor_from_file=make_event_outcome_tensor_from_file(
                visit_tm=get_visit_tmap(f"{name}_first_visit"),
                double=True,
            ),
            channel_map={f"no_{name}": 0, name: 1},
            path_prefix=f"edw/*/{event_type}/{event_name}/start_date",
            time_series_limit=2,
        )
    elif tm_name == f"{name}_single":
        tm = TensorMap(
            name=tm_name,
            shape=(1,),
            interpretation=Interpretation.CATEGORICAL,
            tensor_from_file=make_event_outcome_tensor_from_file(
                visit_tm=get_visit_tmap(f"{name}_first_visit"),
                double=False,
            ),
            channel_map={f"no_{name}": 0, name: 1},
            path_prefix=f"edw/*/{event_type}/{event_name}/start_date",
            time_series_limit=2,
        )
    return tm
Пример #6
0
def create_med_tmap(tm_name: str, med_name: str):
    tm = None

    if tm_name == f"{med_name}_timeseries":
        tm = TensorMap(
            name=tm_name,
            shape=(None, 2, None),  # type: ignore
            interpretation=Interpretation.TIMESERIES,
            tensor_from_file=make_med_array_tensor_from_file(),
            path_prefix=f"edw/*/med/{med_name}",
        )
    elif tm_name == f"{med_name}_dose":
        tm = TensorMap(
            name=tm_name,
            shape=(None, None),  # type: ignore
            interpretation=Interpretation.CONTINUOUS,
            tensor_from_file=make_med_array_tensor_from_file(),
            path_prefix=f"edw/*/med/{med_name}/dose",
        )
    elif tm_name == f"{med_name}_time":
        tm = TensorMap(
            name=tm_name,
            shape=(None, None),  # type: ignore
            interpretation=Interpretation.EVENT,
            tensor_from_file=make_med_array_tensor_from_file(),
            path_prefix=f"edw/*/med/{med_name}/start_date",
        )
    elif tm_name == f"{med_name}_units":
        tm = TensorMap(
            name=tm_name,
            shape=(None,),  # type: ignore
            interpretation=Interpretation.LANGUAGE,
            tensor_from_file=make_med_attribute_tensor_from_file("units"),
            path_prefix=f"edw/*/med/{med_name}",
        )
    elif tm_name == f"{med_name}_route":
        tm = TensorMap(
            name=tm_name,
            shape=(None,),  # type: ignore
            interpretation=Interpretation.LANGUAGE,
            tensor_from_file=make_med_attribute_tensor_from_file("route"),
            path_prefix=f"edw/*/med/{med_name}",
        )

    return tm
Пример #7
0
def create_list_signals_tmap(sig_name: str, sig_type: str, root: str):
    tm = TensorMap(
        name=sig_name,
        shape=(None, None),
        interpretation=Interpretation.LANGUAGE,
        tensor_from_file=make_list_signal_tensor_from_file(sig_type),
        path_prefix=f"{root}/*/{sig_type}",
    )
    return tm
Пример #8
0
def create_timeseries_tmap(key: str) -> TensorMap:
    tmap = TensorMap(
        name=key,
        shape=(None,),
        interpretation=Interpretation.TIMESERIES,
        tensor_from_file=make_static_tensor_from_file(key),
        path_prefix=EDW_PREFIX,
        time_series_limit=0,
    )
    return tmap
Пример #9
0
def create_language_tmap(key: str) -> TensorMap:
    tmap = TensorMap(
        name=key,
        shape=(1,),
        interpretation=Interpretation.LANGUAGE,
        tensor_from_file=make_static_tensor_from_file(key),
        path_prefix=EDW_PREFIX,
        time_series_limit=0,
    )
    return tmap
Пример #10
0
def create_arrest_tmap(tm_name: str):
    arrest_list = ["code_start", "rapid_response_start"]
    tm = None

    if tm_name == "arrest_start_date":
        tm = TensorMap(
            name=tm_name,
            shape=(None, None),  # type: ignore
            interpretation=Interpretation.EVENT,
            tensor_from_file=make_general_event_tensor_from_subevents(arrest_list),
            path_prefix="edw/*/events/{}/start_date",
        )
    if tm_name == "arrest_double":
        tm = TensorMap(
            name=tm_name,
            shape=(2,),
            interpretation=Interpretation.CATEGORICAL,
            tensor_from_file=make_general_event_outcome_tensor_from_subevents(
                visit_tm=get_visit_tmap("arrest_first_visit"),
                events_names_list=arrest_list,
                double=True,
            ),
            channel_map={"no_arrest": 0, "arrest": 1},
            path_prefix="edw/*/events/{}/start_date",
            time_series_limit=2,
        )
    if tm_name == "arrest_single":
        tm = TensorMap(
            name=tm_name,
            shape=(1,),
            interpretation=Interpretation.CATEGORICAL,
            tensor_from_file=make_general_event_outcome_tensor_from_subevents(
                visit_tm=get_visit_tmap("arrest_first_visit"),
                events_names_list=arrest_list,
                double=False,
            ),
            channel_map={"no_arrest": 0, "arrest": 1},
            path_prefix="edw/*/events/{}/start_date",
            time_series_limit=2,
        )

    return tm
Пример #11
0
def create_mrn_tmap():
    tmap = TensorMap(
        name="mrn",
        shape=(1,),
        interpretation=Interpretation.LANGUAGE,
        tensor_from_file=mrn_tensor_from_file,
        path_prefix=EDW_PREFIX,
        time_series_limit=0,
        validators=validator_no_empty,
    )
    return tmap
Пример #12
0
def create_sex_double_tmap():
    tmap = TensorMap(
        name="sex_double",
        interpretation=Interpretation.CATEGORICAL,
        tensor_from_file=sex_double_tensor_from_file,
        channel_map={"male": 0, "female": 1},
        path_prefix=EDW_PREFIX,
        time_series_limit=2,
        validators=validator_not_all_zero,
    )
    return tmap
Пример #13
0
def create_continuous_tmap(key: str) -> TensorMap:
    tmap = TensorMap(
        name=key,
        shape=(1,),
        interpretation=Interpretation.CONTINUOUS,
        tensor_from_file=make_static_tensor_from_file(key),
        path_prefix=EDW_PREFIX,
        time_series_limit=0,
        validators=validator_no_negative,
    )
    return tmap
Пример #14
0
def create_age_tmap():
    tmap = TensorMap(
        name="age",
        shape=(1,),
        interpretation=Interpretation.CONTINUOUS,
        tensor_from_file=admin_age_tensor_from_file,
        path_prefix=EDW_PREFIX,
        time_series_limit=0,
        validators=validator_no_negative,
    )
    return tmap
Пример #15
0
def create_categorical_tmap(key: str, channel_map: Dict[str, int]) -> TensorMap:
    tmap = TensorMap(
        name=key,
        interpretation=Interpretation.CATEGORICAL,
        tensor_from_file=make_static_tensor_from_file(key),
        channel_map=channel_map,
        path_prefix=EDW_PREFIX,
        time_series_limit=0,
        validators=validator_not_all_zero,
    )
    return tmap
Пример #16
0
def create_static_event_tmap(key: str) -> TensorMap:
    tmap = TensorMap(
        name=key,
        shape=(1,),
        interpretation=Interpretation.EVENT,
        tensor_from_file=make_static_tensor_from_file(key),
        path_prefix=EDW_PREFIX,
        time_series_limit=0,
        validators=validator_no_empty,
    )
    return tmap
Пример #17
0
def create_first_visit_tmap(tm_name: str, signal_name: str, data_type: str):
    tm = None
    name = signal_name.replace("|_", "")
    if tm_name == f"{name}_first_visit":
        tm = TensorMap(
            name=tm_name,
            shape=(1,),
            interpretation=Interpretation.CONTINUOUS,
            tensor_from_file=make_first_visit_tensor_from_file(),
            path_prefix=f"edw/*/{data_type}/{signal_name}/start_date",
        )
    return tm
Пример #18
0
def create_arrest_first_visit_tmap(tm_name: str):
    events_names_list = ["code_start", "rapid_response_start"]
    tm = None
    if tm_name == "arrest_first_visit":
        tm = TensorMap(
            name=tm_name,
            shape=(1,),
            interpretation=Interpretation.CONTINUOUS,
            tensor_from_file=make_general_first_visit_tensor_from_subset(
                events_names_list,
            ),
            path_prefix="edw/*/events/{}/start_date",
        )
    return tm
Пример #19
0
    def test_explore(
        default_arguments_explore: argparse.Namespace,
        tmpdir_factory,
        utils,
    ):
        temp_dir = tmpdir_factory.mktemp("explore_tensors")
        default_arguments_explore.tensors = str(temp_dir)
        tmaps = pytest.TMAPS_UP_TO_4D[:]
        tmaps.append(
            TensorMap(
                "scalar",
                shape=(1, ),
                interpretation=Interpretation.CONTINUOUS,
                tensor_from_file=pytest.TFF,
            ), )
        explore_expected = utils.build_hd5s(temp_dir,
                                            tmaps,
                                            n=pytest.N_TENSORS)
        default_arguments_explore.num_workers = 3
        default_arguments_explore.tensor_maps_in = tmaps
        default_arguments_explore.explore_export_fpath = True
        explore(default_arguments_explore)

        csv_path = os.path.join(
            default_arguments_explore.output_folder,
            "tensors_union.csv",
        )
        explore_result = pd.read_csv(csv_path)

        for row in explore_result.iterrows():
            row = row[1]
            for tm in tmaps:
                row_expected = explore_expected[(row["fpath"], tm)]
                if _tmap_requires_modification_for_explore(tm):
                    actual = getattr(row,
                                     continuous_explore_header(tm) + "_mean")
                    assert not np.isnan(actual)
                    continue
                if tm.is_continuous:
                    actual = getattr(row, continuous_explore_header(tm))
                    assert actual == row_expected
                    continue
                if tm.is_categorical:
                    for channel, idx in tm.channel_map.items():
                        channel_val = getattr(
                            row,
                            categorical_explore_header(tm, channel),
                        )
                        assert channel_val == row_expected[idx]
Пример #20
0
def create_bedmaster_signal_tmap(
    signal_name: str,
    signal_type: str,
    tmap_name: str,
    field: str,
    interpretation: Interpretation,
    dtype=None,
):
    tmap = TensorMap(
        name=tmap_name,
        shape=(None, None, None),
        interpretation=interpretation,
        tensor_from_file=make_bedmaster_signal_tensor_from_file(field, dtype),
        path_prefix=f"bedmaster/*/{signal_type}/{signal_name}",
    )
    return tmap
Пример #21
0
def create_bedmaster_signal_metadata_tmap(
    signal_name: str,
    signal_type: str,
    field: str,
    numeric: bool = True,
):
    tmap = TensorMap(
        name=f"{signal_name}_{field}",
        shape=(None,),
        interpretation=Interpretation.CONTINUOUS
        if numeric
        else Interpretation.LANGUAGE,
        tensor_from_file=make_bedmaster_metadata_tensor_from_file(field, numeric),
        path_prefix=f"bedmaster/*/{signal_type}/{signal_name}",
    )
    return tmap
Пример #22
0
def create_sliding_window_outcome_tmap(tmap_name: str) -> Optional[TensorMap]:
    match = None
    if not match:
        pattern = re.compile(
            r"(\d+)_hrs_sliding_window_(.*)_to_(.*)_(\d+)_hrs_step"
            r"_(\d+)_hrs_prediction_(\d+)_hrs_gap$", )
        match = pattern.findall(tmap_name)
        if match:
            window, event_proc_tm_1, event_proc_tm_2, step, prediction, gap = match[
                0]
            make_tensor_from_file = make_sliding_window_outcome_tensor_from_file

            visit_tm = get_visit_tmap(
                re.sub(r"(end_date|start_date)", "first_visit",
                       event_proc_tm_2), )
            event_proc_tm_1 = get_signal_tmap(event_proc_tm_1)
            event_proc_tm_2 = get_signal_tmap(event_proc_tm_2)
            window = int(window)
            step = int(step)
            prediction = int(prediction)
            gap = int(gap)
            name = event_proc_tm_2.name
            return TensorMap(
                name=tmap_name,
                shape=(2, ),
                tensor_from_file=make_tensor_from_file(
                    window=window,
                    step=step,
                    prediction=prediction,
                    gap=gap,
                    event_tm_1=event_proc_tm_1,
                    event_tm_2=event_proc_tm_2,
                    visit_tm=visit_tm,
                ),
                channel_map={
                    f"no_{name}": 0,
                    name: 1
                },
                path_prefix=event_proc_tm_2.path_prefix,
                interpretation=Interpretation.CATEGORICAL,
                validators=validator_no_nans,
                time_series_limit=0,
            )
    return None
Пример #23
0
def make_c3po_death_tmap(
    tmap_name: str,
    tmaps: Dict[str, TensorMap],
) -> Dict[str, TensorMap]:
    pattern = "c3po_death_(\d+)_years_post_ecg"
    years = re.match(pattern, tmap_name)
    if years is None:
        return tmaps

    tmaps[tmap_name] = TensorMap(
        name=tmap_name,
        channel_map={
            "no_death": 0,
            "death": 1
        },
        interpretation=Interpretation.CATEGORICAL,
        tensor_from_file=make_tensor_from_file_c3po_death(years=int(years[1])),
        validators=validator_not_all_zero,
        time_series_limit=0,
        path_prefix=C3PO_PREFIX,
    )
    return tmaps
Пример #24
0
    channel_map = dict()
    for idx, value in enumerate(values):
        channel_map[f"{feature}_{value}"] = idx
    return channel_map


# Categorical (non-binary)
for tmap_name in sts_features_categorical:
    tff = _make_sts_tff_categorical(key=tmap_name)
    channel_map = _make_sts_categorical_channel_map(feature=tmap_name)

    tmaps[tmap_name] = TensorMap(
        name=tmap_name,
        interpretation=Interpretation.CATEGORICAL,
        path_prefix=STS_PREFIX,
        tensor_from_file=tff,
        channel_map=channel_map,
        validators=validator_not_all_zero,
        time_series_limit=0,
        time_series_filter=get_sts_surgery_dates,
    )

# Binary
for tmap_name in sts_features_binary:
    tff = _make_sts_tff_binary(
        key=tmap_name,
        negative_value=2,
        positive_value=1,
    )
    channel_map = outcome_channels(tmap_name)

    tmaps[tmap_name] = TensorMap(
Пример #25
0
def update_tmaps_window(
    tmap_name: str,
    tmaps: Dict[str, TensorMap],
) -> Dict[str, TensorMap]:
    """
    Make new tensor map from base tensor map, making conditional on a date from
    another source of data. This requires a precise format for tensor map name:
        [base_tmap_name]_[N]_days_[pre/post]_[other_data_source]
    e.g.
        ecg_2500_365_days_pre_echo
        ecg_2500_365_days_pre_sts_newest
        av_peak_gradient_30_days_post_echo
    or
        [base_tmap_name]_[N]_to_[N]_days_[pre/post]_[other_data_source]
    e.g.
        ecg_2500_30_to_90_days_pre_sts

    Additionally, a special tensor map can be created to get the days between
    cross referenced events by the following format:
        [source_name]_[N]_days_[pre/post]_[other_data_source]_days_between_matched_events
    e.g.
        ecg_180_days_pre_echo_days_between_matched_events
    """

    pattern_string = (
        fr"(.*?)_(\d+_to_)?(\d+)_days_(pre|post)_({'|'.join(CROSS_REFERENCE_SOURCES)})"
        fr"(_days_between_matched_events)?")
    pattern = re.compile(pattern_string)
    match = pattern.match(tmap_name)
    if match is None:
        return tmaps

    # fmt: off
    # ecg_2500_std_30_to_180_days_pre_echo
    source_name = match[1]  # ecg_2500_std
    if match[2] is not None:  # "30_to_"
        offset_start = int(match[2].replace("_to_", ""))
    else:
        offset_start = ""
    offset_end = int(match[3])  # 180
    pre_or_post = match[4]  # pre
    reference_name = match[5]  # echo
    days_between = match[6] or ""  # (empty string)
    # fmt: on

    offset_start_str = "" if offset_start == "" else f"{offset_start}_to_"
    new_name = f"{source_name}_{offset_start_str}{offset_end}_days_{pre_or_post}_{reference_name}{days_between}"

    # If the tmap should return the number of days between matched events,
    # source_name is the name of a source dataset
    if days_between:
        if source_name not in CROSS_REFERENCE_SOURCES:
            raise ValueError(
                f"Source dataset {source_name} not in known cross reference sources; "
                f"cannot create {new_name}", )
        source_prefix, source_dt_col = _get_dataset_metadata(
            dataset_name=source_name)

        # Setup time series filter, using the default time series filter if the source
        # datetime column is None
        if source_dt_col is not None:
            time_series_filter = lambda data: data[source_prefix][source_dt_col
                                                                  ]
        else:
            time_series_filter = make_default_time_series_filter(source_prefix)

        # Create a fake base tmap which will be modified with a time series filter
        # function which returns the number of days between events
        base_tmap = TensorMap(
            name=source_name,
            shape=(1, ),
            interpretation=Interpretation.CONTINUOUS,
            path_prefix=source_prefix,
            tensor_from_file=_days_between_tensor_from_file,
            time_series_limit=0,
            time_series_filter=time_series_filter,
        )

    # If not getting days between events, source_name is the name of an underlying tmap
    # to filter and must exist
    elif source_name not in tmaps:
        raise ValueError(
            f"Base tmap {source_name} not in existing tmaps; cannot create {new_name}",
        )

    # If all checks pass, get base_tmap in the case that it is an existing tmap
    else:
        base_tmap = tmaps[source_name]

    # Copy the base_tmap to modify, either a real tmap or the fake one setup to get
    # the days between events
    new_tmap = copy.deepcopy(base_tmap)

    reference_prefix, reference_dt_col = _get_dataset_metadata(
        dataset_name=reference_name, )

    # One-to-one matching algorithm maximizes the number of matches by pairing events
    # nearest in time, starting from the most recent event.

    # 1. Sort source dates from newest -> oldest
    # 2. Sort reference dates from newest -> oldest
    # 3. For each reference date, starting from the newest reference date
    #     a. Compute relative time window
    #     b. Take the newest source date in range
    def get_cross_referenced_dates(data: PatientData) -> Dates:
        source_dates = base_tmap.time_series_filter(data)

        # Get dates from reference data
        reference_data = data[reference_prefix]
        if isinstance(reference_data, pd.DataFrame):
            reference_dates = reference_data[
                reference_dt_col]  # Reference data is CSV
        else:
            reference_dates = list(reference_data)  # Reference data is HD5

        # Convert everything to pd.Series of pd.Timestamp
        source_is_list = isinstance(source_dates, list)
        source_dates = pd.Series(source_dates).sort_values(ascending=False)
        source_dates_dt = pd.to_datetime(source_dates)
        reference_dates = pd.Series(reference_dates).sort_values(
            ascending=False)
        reference_dates = pd.to_datetime(reference_dates)

        # Set start and end dates relative to an event
        if pre_or_post == "pre":
            # e.g. 30_days_pre_echo
            if offset_start == "":
                start_dates = reference_dates + datetime.timedelta(
                    days=offset_end * -1)
                end_dates = reference_dates
            # e.g. 60_to_30_days_pre_echo
            else:
                start_dates = reference_dates + datetime.timedelta(
                    days=offset_start * -1, )
                end_dates = reference_dates + datetime.timedelta(
                    days=offset_end * -1)
        else:
            # e.g. 30_days_post_echo
            if offset_start == "":
                start_dates = reference_dates
                end_dates = reference_dates + datetime.timedelta(
                    days=offset_end)
            # e.g. 30_to_60_days_post_echo
            else:
                start_dates = reference_dates + datetime.timedelta(
                    days=offset_start)
                end_dates = reference_dates + datetime.timedelta(
                    days=offset_end)

        dates = pd.Series(dtype=object)
        day_differences = pd.Series(dtype=object)
        for start_date, end_date, reference_date in zip(
                start_dates,
                end_dates,
                reference_dates,
        ):
            # Get newest source date in range of start and end dates
            matched_date = source_dates_dt[source_dates_dt.between(
                start_date, end_date, inclusive=False)][:1]

            # If computing the days between events, calculate the day difference between
            # the reference date and the matched date
            if days_between:
                difference = reference_date - matched_date
                difference = difference.dt.total_seconds() / SECONDS_IN_DAY
                day_differences = day_differences.append(difference)

            # If not computing the days between events, return the actual dates
            else:
                # Computation is done on pd.Timestamp objects but returned list should
                # use the original strings/format in source_dates
                dates = dates.append(source_dates[matched_date.index])

            # Remove the matched date from further matching
            source_dates_dt = source_dates_dt.drop(matched_date.index)

        if len(dates) == 0 and len(day_differences) == 0:
            raise ValueError("No cross referenced dates")

        if days_between:
            return day_differences
        elif source_is_list:
            return list(dates)
        else:
            return dates

    new_tmap.time_series_filter = get_cross_referenced_dates
    new_tmap.name = new_name
    tmaps[new_name] = new_tmap
    return tmaps
Пример #26
0
    def test_tensor_map_equality():
        tensor_map_1a = TensorMap(
            name="tm",
            loss="logcosh",
            channel_map={"c1": 1, "c2": 2},
            metrics=[],
            tensor_from_file=pytest.TFF,
        )
        tensor_map_1b = TensorMap(
            name="tm",
            loss="logcosh",
            channel_map={"c1": 1, "c2": 2},
            metrics=[],
            tensor_from_file=pytest.TFF,
        )
        tensor_map_2a = TensorMap(
            name="tm",
            loss=logcosh,
            channel_map={"c1": 1, "c2": 2},
            metrics=[],
            tensor_from_file=pytest.TFF,
        )
        tensor_map_2b = TensorMap(
            name="tm",
            loss=logcosh,
            channel_map={"c2": 2, "c1": 1},
            metrics=[],
            tensor_from_file=pytest.TFF,
        )
        tensor_map_3 = TensorMap(
            name="tm",
            loss=logcosh,
            channel_map={"c1": 1, "c2": 3},
            metrics=[],
            tensor_from_file=pytest.TFF,
        )
        tensor_map_4 = TensorMap(
            name="tm",
            loss=logcosh,
            channel_map={"c1": 1, "c2": 3},
            metrics=[all],
            tensor_from_file=pytest.TFF,
        )
        tensor_map_5a = TensorMap(
            name="tm",
            loss=logcosh,
            channel_map={"c1": 1, "c2": 3},
            metrics=[all, any],
            tensor_from_file=pytest.TFF,
        )
        tensor_map_5b = TensorMap(
            name="tm",
            loss=logcosh,
            channel_map={"c1": 1, "c2": 3},
            metrics=[any, all],
            tensor_from_file=pytest.TFF,
        )

        assert tensor_map_1a == tensor_map_1b
        assert tensor_map_2a == tensor_map_2b
        assert tensor_map_1a == tensor_map_2a
        assert tensor_map_5a == tensor_map_5b

        assert tensor_map_2a != tensor_map_3
        assert tensor_map_3 != tensor_map_4
        assert tensor_map_3 != tensor_map_5a
        assert tensor_map_4 != tensor_map_5a
Пример #27
0
        normalizer = None
        if standardize == "_scaled":
            normalizer = RobustScalePopulation(
                median=echo_measures_continuous[tmap_name]["median"],
                iqr=echo_measures_continuous[tmap_name]["iqr"],
            )
        else:
            normalizer = None

        tmaps[tmap_name + standardize] = TensorMap(
            name=tmap_name + standardize,
            shape=(1, ),
            interpretation=Interpretation.CONTINUOUS,
            path_prefix=ECHO_PREFIX,
            tensor_from_file=_make_echo_tff_continuous(key=tmap_key),
            validators=RangeValidator(
                minimum=echo_measures_continuous[tmap_name]["min"],
                maximum=echo_measures_continuous[tmap_name]["max"],
            ),
            normalizers=normalizer,
            time_series_limit=0,
            time_series_filter=get_echo_dates,
        )

tmap_name = "echo_datetime"
tmaps[tmap_name] = TensorMap(
    name=tmap_name,
    shape=(1, ),
    interpretation=Interpretation.LANGUAGE,
    path_prefix=ECHO_PREFIX,
    tensor_from_file=_make_echo_tff_continuous(key=ECHO_DATETIME_COLUMN),
    validators=validator_no_nans,
Пример #28
0
def create_static_around_tmap(tm_name: str):
    pattern = re.compile(r"^age_(.*)_(single|double)$")
    match = pattern.findall(tm_name)
    if match:
        event_proc, samples = match[0]
        visit_tm = get_visit_tmap(
            event_proc.replace("end_date", "first_visit").replace(
                "start_date",
                "first_visit",
            ), )
        samples = 1 if samples == "single" else 2
        return TensorMap(
            name=tm_name,
            shape=(samples, ),
            interpretation=Interpretation.CONTINUOUS,
            tensor_from_file=admin_age_event_visit_tensor_from_file(
                visit_tm=visit_tm,
                samples=samples,
            ),
            path_prefix="edw/*",
        )
    pattern = re.compile(
        r"^(age|length_of_stay)_(\d+)_hrs_sliding_window_(.*)"
        r"_to_(.*)_(\d+)_hrs_step$", )
    match = pattern.findall(tm_name)
    if match:
        signal, window, event_proc_tm_1, event_proc_tm_2, step = match[0]
        visit_tm = get_visit_tmap(
            event_proc_tm_1.replace("end_date", "first_visit").replace(
                "start_date",
                "first_visit",
            ), )
        event_proc_tm_1 = get_signal_tmap(event_proc_tm_1)
        event_proc_tm_2 = get_signal_tmap(event_proc_tm_2)
        window = int(window)
        step = int(step)
        if signal == "age":
            tensor_from_file = admin_age_event_visit_tensor_from_file
        else:
            tensor_from_file = length_of_stay_sliding_window_tensor_from_file
        return TensorMap(
            name=tm_name,
            shape=(1, ),
            interpretation=Interpretation.CONTINUOUS,
            tensor_from_file=tensor_from_file(
                visit_tm=visit_tm,
                window=window,
                step=step,
                event_tm_1=event_proc_tm_1,
                event_tm_2=event_proc_tm_2,
            ),
            path_prefix="edw/*",
        )
    pattern = re.compile(r"^length_of_stay_(\d+)_hrs_(pre|post)_(.*)$")
    match = pattern.findall(tm_name)
    if match:
        time, period, event_proc_tm = match[0]
        visit_tm = get_visit_tmap(
            event_proc_tm.replace("end_date", "first_visit").replace(
                "start_date",
                "first_visit",
            ), )
        hrs_to_event = [int(time)]
        periods = [period]
    else:
        pattern = re.compile(
            r"^length_of_stay_(\d+)_hrs_(pre|post)_(.*)"
            r"_(\d+)_hrs_(pre|post)_(.*)$", )
        match = pattern.findall(tm_name)
        if match:
            time_1, period_1, time_2, period_2, event_proc_tm = match[0]
            hrs_to_event = [int(time_1), int(time_2)]
            periods = [period_1, period_2]
    if match:
        visit_tm = get_visit_tmap(
            event_proc_tm.replace("end_date", "first_visit").replace(
                "start_date",
                "first_visit",
            ), )
        event_proc_tm = get_signal_tmap(event_proc_tm)
        return TensorMap(
            name=tm_name,
            shape=(1, ),
            interpretation=Interpretation.CONTINUOUS,
            tensor_from_file=length_of_stay_event_tensor_from_file(
                visit_tm=visit_tm,
                event_tm=event_proc_tm,
                hrs_to_event=hrs_to_event,
                periods=periods,
            ),
            path_prefix="edw/*",
        )
    return None
Пример #29
0
def create_sliding_window_tmap(tmap_name: str) -> Optional[TensorMap]:
    match = None

    imputation_type = None
    feature = "raw"
    tmap_match_name = tmap_name

    pattern = re.compile(r"^(.*)_(mean_imputation)$")
    match = pattern.findall(tmap_match_name)
    if match:
        _, imputation_type = match[0]
        tmap_match_name = tmap_match_name.replace(f"_{imputation_type}", "")
        match = None

    pattern = re.compile(fr"^(.*)_({FEATURES})$")
    match = pattern.findall(tmap_match_name)
    if match:
        _, feature = match[0]
        tmap_match_name = tmap_match_name.replace(f"_{feature}", "")
        match = None

    if not match:
        pattern = re.compile(
            r"^(.*)_(\d+)_hrs_sliding_window_(.*)_to_(.*)_(\d+)_hrs_step$", )
        match = pattern.findall(tmap_match_name)
        if match:
            signal_tm, window, event_proc_tm_1, event_proc_tm_2, step = match[
                0]
            make_tensor_from_file = make_sliding_window_tensor_from_file

    if not match:
        return None

    if feature == "raw":
        shape = (None, None)
    else:
        shape = (None, )
    if signal_tm.endswith("_timeseries"):
        shape += (2, )

    time_tm = get_time_tm(signal_tm)
    visit_tm = get_visit_tmap(
        re.sub(r"(end_date|start_date)", "first_visit", event_proc_tm_2), )
    signal_tm = _get_tmap(signal_tm)
    event_proc_tm_1 = get_signal_tmap(event_proc_tm_1)
    event_proc_tm_2 = get_signal_tmap(event_proc_tm_2)
    window = int(window)
    step = int(step)

    return TensorMap(
        name=tmap_name,
        shape=shape,
        tensor_from_file=make_tensor_from_file(
            window=window,
            step=step,
            event_tm_1=event_proc_tm_1,
            event_tm_2=event_proc_tm_2,
            visit_tm=visit_tm,
            signal_tm=signal_tm,
            signal_time_tm=time_tm,
            feature=feature,
            imputation_type=imputation_type,
        ),
        channel_map=signal_tm.channel_map,
        path_prefix=signal_tm.path_prefix,
        interpretation=signal_tm.interpretation,
        validators=validator_no_nans,
        time_series_limit=0,
    )
Пример #30
0
def create_around_tmap(tmap_name: str) -> Optional[TensorMap]:
    match = None

    time_2 = None
    period_2 = None
    event_proc_tm_2 = None
    imputation_type = None
    feature = "raw"

    shape: Optional[Tuple[Axis, ...]] = None
    make_tensor_from_file = None
    tmap_match_name = tmap_name

    if tmap_name.endswith("_explore"):
        return None

    pattern = re.compile(r"^(.*)_(mean_imputation|sample_and_hold)$")
    match = pattern.findall(tmap_match_name)
    if match:
        _, imputation_type = match[0]
        tmap_match_name = tmap_match_name.replace(f"_{imputation_type}", "")
        match = None

    pattern = re.compile(fr"^(.*)_({FEATURES})$")
    match = pattern.findall(tmap_match_name)
    if match:
        _, feature = match[0]
        tmap_match_name = tmap_match_name.replace(f"_{feature}", "")
        match = None

    if not match:
        pattern = re.compile(
            r"^(.*)_(\d+)_hrs_(pre|post)_(.*)_(\d+)_hrs_(pre|post)_(.*)"
            r"_(\d+)_hrs_window$", )
        match = pattern.findall(tmap_match_name)
        if match:
            (
                signal_tm,
                time_1,
                period_1,
                event_proc_tm,
                time_2,
                period_2,
                event_proc_tm_2,
                window,
            ) = match[0]
            make_tensor_from_file = make_around_event_tensor_from_file
            times = [int(time_1), int(time_2)]
            periods = [period_1, period_2]

    if not match:
        pattern = re.compile(
            r"^(.*)_(\d+)_hrs_(pre|post)_(.*)_(\d+)_hrs_window$", )
        match = pattern.findall(tmap_match_name)
        if match:
            signal_tm, time, period, event_proc_tm, window = match[0]
            make_tensor_from_file = make_around_event_tensor_from_file
            times = [int(time)]
            periods = [period]

    if not match:
        return None

    if signal_tm.endswith("_timeseries"):
        if feature == "raw":
            shape = (None, 2)
        else:
            shape = (2, )
    else:
        if feature == "raw":
            shape = (None, )
        else:
            shape = (1, )

    time_tm = get_time_tm(signal_tm)

    visit_tm = get_visit_tmap(
        re.sub(r"(end_date|start_date)", "first_visit", event_proc_tm), )

    window = int(window)
    signal_tm = _get_tmap(signal_tm)
    event_proc_tms = [get_signal_tmap(event_proc_tm)]
    if event_proc_tm_2 is not None:
        event_proc_tms.append(get_signal_tmap(event_proc_tm_2))

    return TensorMap(
        name=tmap_name,
        shape=shape,
        tensor_from_file=make_tensor_from_file(
            times=times,
            periods=periods,
            window=window,
            feature=feature,
            event_tms=event_proc_tms,
            visit_tm=visit_tm,
            signal_tm=signal_tm,
            signal_time_tm=time_tm,
            imputation_type=imputation_type,
        ),
        channel_map=signal_tm.channel_map,
        path_prefix=signal_tm.path_prefix,
        interpretation=signal_tm.interpretation,
        validators=validator_no_nans,
    )