Exemplo n.º 1
0
def test_brier_times_too_large(nottingham_prognostic_index):
    pred, y = nottingham_prognostic_index([1825])

    with pytest.raises(
            ValueError,
            match="all times must be within follow-up time of test data:"):
        brier_score(y, y, pred, times=9999)
Exemplo n.º 2
0
def test_brier_nottingham_many(nottingham_prognostic_index):
    times = [365, 730, 1095, 1460, 1825]
    pred, y = nottingham_prognostic_index(times)

    expected_score = numpy.array([
        0.0762922458520448,
        0.182536421174199,
        0.220017747254941,
        0.234133800146671,
        0.233822955042198,
    ])

    t1, score = brier_score(y, y, pred.squeeze(), times=times)
    assert_array_almost_equal(score, expected_score)

    t2, score = brier_score(y, y, pred.squeeze(), times=times[::-1])
    assert_array_almost_equal(score, expected_score)
    assert_array_equal(t1, t2)
Exemplo n.º 3
0
def compute_Brier_scores(model,
                         quantiles,
                         DATA_te,
                         DATA_tr,
                         pretrain_state,
                         risk=0):
    '''Compute Brrier score based on CDF

        Inputs:
        model: trained model
        quantiles:
        DATA_te: passed test Data (featurs)
        DATA_tr: passed train Data (featurs)
        t_horizon: a vector of times to calculate cdf
        pretrain_state: show we use pretrain model or not
        '''

    cdf_preds = predict_cdf(model, DATA_te, quantiles, pretrain_state)
    cdf_preds = [np.exp(cdf.numpy()) for cdf in cdf_preds]

    _, t_valid, e_valid = DATA_te
    _, t_train, e_train = DATA_tr

    e_train = e_train.astype('bool')
    e_valid = e_valid.astype('bool')

    et1 = np.array([(e_train[i], t_train[i]) for i in range(len(e_train))],
                   dtype=[('e', bool), ('t', int)])
    et2 = np.array([(e_valid[i], t_valid[i]) for i in range(len(e_valid))],
                   dtype=[('e', bool), ('t', int)])

    cdf_br_25 = brier_score(et1, et2, cdf_preds[0], quantiles[0])
    cdf_br_50 = brier_score(et1, et2, cdf_preds[1], quantiles[1])
    cdf_br_75 = brier_score(et1, et2, cdf_preds[2], quantiles[2])
    cdf_br_m = brier_score(et1, et2, cdf_preds[3], quantiles[3])

    return np.squeeze(cdf_br_25[1]), np.squeeze(cdf_br_50[1]), np.squeeze(
        cdf_br_75[1]), np.squeeze(cdf_br_m[1])
Exemplo n.º 4
0
def test_brier_coxph():
    X, y = load_gbsg2()
    X.loc[:, "tgrade"] = X.loc[:, "tgrade"].map(len).astype(int)

    Xt = OneHotEncoder().fit_transform(X)

    est = CoxPHSurvivalAnalysis(ties="efron").fit(Xt, y)
    survs = est.predict_survival_function(Xt)

    preds = [fn(1825) for fn in survs]

    _, score = brier_score(y, y, preds, 1825)

    assert round(abs(score[0] - 0.208817407492645), 5) == 0
Exemplo n.º 5
0
def train_model():
    from dsm import datasets, DeepSurvivalMachines
    import numpy as np
    from sksurv.metrics import concordance_index_ipcw, brier_score

    survival_data = np.loadtxt('./new_survival_data.csv', delimiter=',')
    features = np.loadtxt('./new_features.csv', delimiter=',')

    x = features
    t = survival_data[:, 0]
    e = survival_data[:, 1]

    times = np.quantile(t[e == 1], [0.25, 0.5, 0.75]).tolist()

    cv_folds = 2
    folds = list(range(cv_folds))*10000
    folds = np.array(folds[:len(x)])


    cis = []
    brs = []
    for fold in range(cv_folds):

        print("On Fold:", fold)

        x_train, t_train, e_train = x[folds != fold], t[folds != fold], e[folds != fold]
        x_test, t_test, e_test = x[folds == fold], t[folds == fold], e[folds == fold]
        print(x_train.shape)

        model = DeepSurvivalMachines(distribution='Weibull', layers=[100])
        model.fit(x_train, t_train, e_train, iters=10, learning_rate=1e-3, batch_size=10)

        et_train = np.array([(e_train[i], t_train[i]) for i in range(len(e_train))],
                            dtype=[('e', bool), ('t', int)])

        et_test = np.array([(e_test[i], t_test[i]) for i in range(len(e_test))],
                        dtype=[('e', bool), ('t', int)])

        out_risk = model.predict_risk(x_test, times)
        out_survival = model.predict_survival(x_test, times)

        cis_ = []
        for i in range(len(times)):
            cis_.append(concordance_index_ipcw(et_train, et_test, out_risk[:, i], times[i])[0])
        cis.append(cis_)

        brs.append(brier_score(et_train, et_test, out_survival, times)[1])

    print("Concordance Index:", np.mean(cis, axis=0))
    print("Brier Score:", np.mean(brs, axis=0))
Exemplo n.º 6
0
def test_brier_wrong_estimate_shape(nottingham_prognostic_index):
    pred, y = nottingham_prognostic_index([720, 1825])

    with pytest.raises(ValueError,
                       match="expected estimate with 2 columns, but got 1"):
        brier_score(y, y, pred[:, :1], times=[720, 1825])

    with pytest.raises(ValueError,
                       match="expected estimate with 3 columns, but got 2"):
        brier_score(y, y, pred, times=[720, 960, 1825])

    with pytest.raises(ValueError,
                       match="expected estimate with 686 samples, but got 10"):
        brier_score(y, y, pred[:10], times=[720, 1825])
Exemplo n.º 7
0
def test_brier_wrong_estimate_shape(nottingham_prognostic_index):
    pred, y = nottingham_prognostic_index([720, 1825])

    with pytest.raises(ValueError,
                       match="expected estimate with 2 columns, but got 1"):
        brier_score(y, y, pred[:, :1], times=[720, 1825])

    with pytest.raises(ValueError,
                       match="expected estimate with 3 columns, but got 2"):
        brier_score(y, y, pred, times=[720, 960, 1825])

    with pytest.raises(
            ValueError,
            match=
            r"Found input variables with inconsistent numbers of samples: \[686, 10\]"
    ):
        brier_score(y, y, pred[:10], times=[720, 1825])
Exemplo n.º 8
0
def test_brier_nottingham(brier_npi_data):
    pred, y, times, expected_score = brier_npi_data

    _, score = brier_score(y, y, pred.squeeze(), times=times)
    assert round(abs(score[0] - expected_score), 6) == 0
Exemplo n.º 9
0
def stratified_brier_score(
    maximum_brier_eval_time,
    survival_data_train,
    survival_data_test,
    risk_score_train,
    risk_score_test,
    strata_train,
    strata_test,
    stratified_fitted=True,
    save_stratified_scores=True,
    minimum_brier_eval_time=None,
):

    event_time_train = survival_data_train["event_time"]
    event_time_test = survival_data_test["event_time"]

    # Assert values lie within the needed ranges
    min_strata_train = [
        np.min(event_time_train[strata_train == s])
        for s in np.unique(strata_train)
    ]
    max_strata_train = [
        np.max(event_time_train[strata_train == s])
        for s in np.unique(strata_train)
    ]

    # Get boolean indexer array for event times in test data, which are
    # smaller/greater than minimum/maximum event time in train data
    extends_strata_min = [(np.min(event_time_train[strata_train == s]) >
                           event_time_test[strata_test == s])
                          for s in np.unique(strata_test)]
    extends_strata_max = [(np.max(event_time_train[strata_train == s]) <
                           event_time_test[strata_test == s])
                          for s in np.unique(strata_test)]

    min_strata_test = []
    max_strata_test = []

    for s, e_min_mask, e_max_mask in zip(np.unique(strata_train),
                                         extends_strata_min,
                                         extends_strata_max):
        if e_min_mask.any():
            min_strata_test.append(
                np.min(event_time_test[strata_test == s][~e_min_mask]))
        else:
            min_strata_test.append(np.min(event_time_test[strata_test == s]))
        if e_max_mask.any():
            max_strata_test.append(
                np.max(event_time_test[strata_test == s][~e_max_mask]))
        else:
            max_strata_test.append(np.max(event_time_test[strata_test == s]))

    # Choose the maximum of the minimal values within the strata
    event_time_strata_min = np.max(min_strata_train + min_strata_test)
    # Choose the minimum of maximal values within the strata
    event_time_strata_max = np.min(max_strata_train + max_strata_test)

    if event_time_strata_max < maximum_brier_eval_time:
        pec_largest_eval_time = event_time_strata_max
    else:
        pec_largest_eval_time = maximum_brier_eval_time

    if (minimum_brier_eval_time is not None
            and event_time_strata_min < minimum_brier_eval_time):
        pec_smallest_eval_time = minimum_brier_eval_time
    else:
        pec_smallest_eval_time = event_time_strata_min

    # Final evaluation times for brier score
    eval_times_brier_score = np.arange(start=pec_smallest_eval_time,
                                       stop=pec_largest_eval_time - 1,
                                       step=20)

    survival_train_groups = []
    survival_test_groups = []

    risk_train_groups = []
    risk_test_groups = []

    strata_indicator_train_groups = []
    strata_indicator_test_groups = []

    for strata in np.unique(strata_train):

        risk_score_strata_train = risk_score_train[strata == strata_train]
        risk_score_strata_test = risk_score_test[strata == strata_test]

        survival_strata_train = survival_data_train[strata == strata_train]
        survival_strata_test = survival_data_test[strata == strata_test]

        strata_indicator_train = strata_train[strata == strata_train]
        strata_indicator_test = strata_test[strata == strata_test]

        # Check that testing times lie within range of training times.
        extends_train_min = (np.min(survival_strata_train["event_time"]) >
                             survival_strata_test["event_time"])

        if extends_train_min.any():
            risk_score_strata_test = risk_score_strata_test[~extends_train_min]
            survival_strata_test = survival_strata_test[~extends_train_min]
            strata_indicator_test = strata_indicator_test[~extends_train_min]

        extends_train_max = (np.max(survival_strata_train["event_time"]) <
                             survival_strata_test["event_time"])

        if extends_train_max.any():
            risk_score_strata_test = risk_score_strata_test[~extends_train_max]
            survival_strata_test = survival_strata_test[~extends_train_max]
            strata_indicator_test = strata_indicator_test[~extends_train_max]

        survival_train_groups.append(survival_strata_train)
        survival_test_groups.append(survival_strata_test)
        risk_train_groups.append(risk_score_strata_train)
        risk_test_groups.append(risk_score_strata_test)
        strata_indicator_train_groups.append(strata_indicator_train)
        strata_indicator_test_groups.append(strata_indicator_test)

    predictions = []

    if stratified_fitted:
        for train_data, train_risk, test_risk in zip(
                survival_train_groups,
                risk_train_groups,
                risk_test_groups,
        ):
            # Fit Breslow Estimator on Training Data.
            breslow = BreslowEstimator()
            breslow.fit(
                train_risk,
                train_data["event_indicator"],
                train_data["event_time"],
            )

            # Predict Survival Probability on Test Data.
            surv_funcs = breslow.get_survival_function(test_risk)
            prob_preds = [fn(eval_times_brier_score) for fn in surv_funcs]

            # Append stratified data.
            predictions.append(prob_preds)
    else:
        breslow = BreslowEstimator()
        breslow.fit(
            np.concatenate(risk_train_groups),
            np.concatenate(survival_train_groups)["event_indicator"],
            np.concatenate(survival_train_groups)["event_time"],
        )

        surv_funcs = breslow.get_survival_function(
            np.concatenate(risk_test_groups))
        prob_preds = [fn(eval_times_brier_score) for fn in surv_funcs]

        predictions.append(prob_preds)

    total_brier_scores = []
    group_sizes = []

    strata_train = np.concatenate(strata_indicator_train_groups)
    strata_test = np.concatenate(strata_indicator_test_groups)
    survival_data_train = np.concatenate(survival_train_groups)
    survival_data_test = np.concatenate(survival_test_groups)
    predictions = np.concatenate(predictions)

    for strata in np.unique(strata_train):
        train_dat = survival_data_train[strata_train == strata]
        test_dat = survival_data_test[strata_test == strata]
        preds = predictions[strata_test == strata]

        # IPCW weights are included in imported function "brier_score"
        _, strata_brier_score = brier_score(
            train_dat,
            test_dat,
            preds,
            eval_times_brier_score,
        )

        total_brier_scores.append(strata_brier_score)
        group_sizes.append(len(preds))

    if save_stratified_scores:
        return eval_times_brier_score, np.average(np.stack(total_brier_scores),
                                                  weights=group_sizes,
                                                  axis=0)
    else:
        return eval_times_brier_score, total_brier_scores, group_sizes
def main(args):
    """
    Runs evaluation for the data set
        1. Loads model from tar.gz
        2. Reads in test features
        3. Runs an accuracy report
        4. Generates feature importance with SHAP

    Args:
        model-name (str): Name of the trained model, default xgboost
        test-features (str): preprocessed test features for
         evaluation, default test_features.csv
        train-features (str): preproceed train features for SHAP,
        default train_features.csv
        test-features (str): preproceed test features for SHAP,
        default test_features.csv
        report-name (str): Name of the evaluation output
        , default evaluation.json
        shap-name (str): Name of the SHAP feature importance
        output file, default shap.csv
        threshold (float): Threshold to cut probablities at
        , default 0.5
        tau (int): time range for the c-index will be from 0 to tau
        , default 100
    """

    model_path = os.path.join("/opt/ml/processing/model", "model.tar.gz")

    logger.info(f"Extracting model from path: {model_path}")

    with tarfile.open(model_path) as tar:
        tar.extractall(path=".")
    logger.info("Loading model")
    with open(args.model_name, "rb") as f:
        model = pickle.load(f)

    logger.info("Loading train and test data")

    test_features_data = os.path.join("/opt/ml/processing/test",
                                      args.test_features)
    train_features_data = os.path.join("/opt/ml/processing/train",
                                       args.train_features)

    X_test = pd.read_csv(test_features_data, header=0)
    X_train = pd.read_csv(train_features_data, header=0)

    y_test = X_test.iloc[:, 0]
    y_train = X_train.iloc[:, 0]

    # Reverse transfrom to event and duration columns
    y_test_df = pd.DataFrame(
        np.vstack((np.where(y_test > 0, 1, 0), np.abs(y_test))).T,
        columns=["event", "duration"],
    )

    y_train_df = pd.DataFrame(
        np.vstack((np.where(y_train > 0, 1, 0), np.abs(y_train))).T,
        columns=["event", "duration"],
    )

    X_test.drop(X_test.columns[0], axis=1, inplace=True)
    X_train.drop(X_test.columns[0], axis=1, inplace=True)

    logger.info("Running inference")

    predictions = model.predict(xgboost.DMatrix(X_test.values[:, 1:]),
                                output_margin=False)

    logger.info("Creating evaluation report")

    # NOTE: technical evaluation is really not as a classifier
    # TO DO: Normalize to 0 to 1 scale
    report_dict = classification_report(y_test_df["event"],
                                        predictions > args.threshold,
                                        output_dict=True)
    report_dict["accuracy"] = accuracy_score(y_test_df["event"],
                                             predictions > args.threshold)

    _, y_train_tuple = get_x_y(y_train_df, ["event", "duration"],
                               pos_label=True)
    _, y_test_tuple = get_x_y(y_test_df, ["event", "duration"], pos_label=True)

    concordance_index = concordance_index_ipcw(
        y_train_tuple,
        y_test_tuple,
        predictions,
        tau=args.tau,  # default within 100 days
    )

    report_dict["concordance_index"] = {
        "cindex": float(concordance_index[0]),
        "concordant": int(concordance_index[1]),
        "discordant": int(concordance_index[2]),
        "tied_risk": int(concordance_index[3]),
        "tied_time": int(concordance_index[4]),
    }

    times, score = brier_score(y_train_tuple, y_test_tuple, predictions,
                               y_test_df["duration"].max() - 1)

    report_dict["brier_score"] = {
        "times": times.astype(np.int32).tolist(),
        "score": score.astype(np.float32).tolist(),
    }

    logger.info(f"Classification report:\n{report_dict}")

    evaluation_output_path = os.path.join("/opt/ml/processing/evaluation",
                                          args.report_name)
    logger.info(f"Saving classification report to {evaluation_output_path}")

    logger.debug(report_dict)

    with open(evaluation_output_path, "w") as f:
        f.write(json.dumps(report_dict))

    # SHAP
    latest_job_debugger_artifacts_path = "/opt/ml/processing/debug/debug-output"
    trial = create_trial(latest_job_debugger_artifacts_path)

    shap_values = trial.tensor("full_shap/f0").value(trial.last_complete_step)

    pd.DataFrame(shap_values).to_csv(
        os.path.join("/opt/ml/processing/evaluation", args.shap_name))

    shap_no_base = shap_values[1:, :-1]
    feature_names = X_train.columns
    os.makedirs("/opt/ml/processing/plot/", exist_ok=True)
    logger.info(shap_values.shape, shap_no_base.shape, X_train.shape)
    shap.summary_plot(shap_no_base,
                      features=X_train,
                      feature_names=feature_names,
                      show=False)
    plt.savefig("/opt/ml/processing/plot/feature_importance.png",
                bbox_inches="tight")
Exemplo n.º 11
0
    )

    for s in np.unique(strata):

        strata_train_dat = survival_data_train[strata_train == s]
        strata_test_dat = survival_data_test[strata_test == s]

        kaplan_preds = np.repeat(
            [kmf.predict(eval_times_brier_score).to_numpy()],
            strata_test_dat.shape[0],
            axis=0,
        )

        times, km_score = brier_score(
            survival_train=strata_train_dat,
            survival_test=strata_test_dat,
            estimate=kaplan_preds,
            times=eval_times_brier_score,
        )

        kaplan_brier_scores.append(km_score)
        kaplan_group_sizes.append(strata_test_dat.shape[0])

    kmf_brier_scores = np.average(np.stack(kaplan_brier_scores),
                                  weights=kaplan_group_sizes,
                                  axis=0)

    lasso_strat_eval_times, lasso_strat_brier_scores = stratified_brier_score(
        MAX_EVAL_TIME_PEC,
        survival_data_train,
        survival_data_test,
        lasso_linear_predictor_strat_train.to_numpy(),