Exemplo n.º 1
0
def c_index_multiple_from_python(matrix,
                                 isdead,
                                 nbdays,
                                 matrix_test,
                                 isdead_test,
                                 nbdays_test,
                                 isfactor=False):
    """
    """
    frame = pd.DataFrame(matrix)
    frame["isdead"] = isdead
    frame["nbdays"] = nbdays

    frame_test = pd.DataFrame(matrix_test)
    frame_test["isdead"] = isdead_test
    frame_test["nbdays"] = nbdays_test

    cph = CoxPHFitter()

    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            cph.fit(frame, "nbdays", "isdead")
    except Exception as e:
        print(e)
        return np.nan

    cindex = cph.score(frame_test, scoring_method="concordance_index")

    return cindex
Exemplo n.º 2
0
def fit_cox(
    train_df: Union[pd.DataFrame, str],
    covariates: List[str],
    test_df: Union[pd.DataFrame, str] = None,
    strata: List[str] = None,
    plot: bool = False,
    process_dir: str = None,
):
    if isinstance(train_df, str):
        train_df = pd.read_csv(train_df)
    if isinstance(test_df, str):
        test_df = pd.read_csv(test_df)
    cphf = CoxPHFitter()
    included_cols = ["duration", "event"] + list(covariates)
    print(train_df.columns)
    cphf.fit(
        train_df[included_cols],
        duration_col="duration",
        event_col="event",
        strata=strata,
    )

    results = {
        "log_likelihood":
        cphf.log_likelihood_,
        "concordance_index":
        cphf.concordance_index_,
        "log_likelihood_ratio_test_pvalue":
        cphf.log_likelihood_ratio_test().p_value,
    }

    if test_df is not None:
        results["test_log_likelihood"] = cphf.score(
            test_df[included_cols], scoring_method="log_likelihood")
        results["test_concordance_index"] = cphf.score(
            test_df[included_cols], scoring_method="concordance_index")

    if plot and process_dir is not None:
        plt.figure(figsize=(5, 10))
        cphf.plot()
        plt.savefig(os.path.join(process_dir, "hazard_plot.pdf"))

    if process_dir is not None:
        cphf.summary.to_csv(os.path.join(process_dir, "summary.csv"))
        save_dict_to_json(os.path.join(process_dir, "results.json"), results)

    return results, cphf
def objective(trial, df_trn, df_val):
    penalizer = trial.suggest_loguniform('penalizer', 1e-5, 1e2)
    l1_ratio = trial.suggest_uniform('l1_ratio', 0.0, 1.0)
    print(trial.params)
    print('Fitting...')
    cph = CoxPHFitter(penalizer=penalizer, l1_ratio=l1_ratio)
    cph.fit(df_trn, duration_col='TIME', event_col='EVENT', show_progress=True)
    print('done')
    loglike = cph.score(df_val)
    return loglike
Exemplo n.º 4
0
def c_index_from_python(values,
                        isdead,
                        nbdays,
                        values_test,
                        isdead_test,
                        nbdays_test,
                        isfactor=False):
    """
    """

    if isfactor:
        values = np.asarray(values).astype("str")
        values_test = np.asarray(values_test).astype("str")

    frame = pd.DataFrame({
        "values": values,
        "isdead": isdead,
        "nbdays": nbdays
    })

    frame_test = pd.DataFrame({
        "values": values_test,
        "isdead": isdead_test,
        "nbdays": nbdays_test
    })

    cph = CoxPHFitter()

    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            cph.fit(frame, "nbdays", "isdead")
    except Exception as e:
        print(e)
        return np.nan

    cindex = cph.score(frame_test, scoring_method="concordance_index")

    return cindex
Exemplo n.º 5
0
# -*- coding: utf-8 -*-
# cox regression

if __name__ == "__main__":
    import pandas as pd
    import time
    import numpy as np

    from lifelines import CoxPHFitter
    from lifelines.datasets import load_rossi, load_regression_dataset

    reps = 1
    df = load_rossi()
    # df['s'] = "a"
    df = pd.concat([df] * reps)
    print(df.shape)

    cph = CoxPHFitter(baseline_estimation_method="spline",
                      n_baseline_knots=3,
                      strata=["wexp"])
    start_time = time.time()
    cph.fit(df,
            duration_col="week",
            event_col="arrest",
            show_progress=True,
            timeline=np.linspace(1, 60, 100))
    print(cph.score(df))
    print("--- %s seconds ---" % (time.time() - start_time))
    # special for Cox
    xy_train_df = pd.DataFrame(x_train)
    xy_train_df['T'] = y_train
    xy_train_df['E'] = e_train

    xy_val_df = pd.DataFrame(x_val)
    xy_val_df['T'] = y_val
    xy_val_df['E'] = e_val

    xy_val_df_events = xy_val_df[xy_val_df['E'] == 1]

    cph = CoxPHFitter(penalizer=0.1).fit(xy_train_df, 'T', 'E')

    preds = -cph.predict_partial_hazard(xy_val_df)

    cindex_train = cph.score(xy_train_df, scoring_method='concordance_index')
    cindex_val = cph.score(xy_val_df, scoring_method='concordance_index')
    cindex_val_events = cph.score(xy_val_df_events,
                                  scoring_method='concordance_index')

    cph_cindex_trains.append(cindex_train)
    cph_cindex_vals.append(cindex_val)
    cph_cindex_vals_events.append(cindex_val_events)
    ps.append(p_to_drop)

    print('Train cindex {:.2f}'.format(cindex_train * 100))
    print('Test cindex  {:.2f}'.format(cindex_val * 100))
    print('Test cindex Events Only {:.2f}'.format(cindex_val_events * 100))
    print(
        '=================================================================================================='
    )
Exemplo n.º 7
0
# -*- coding: utf-8 -*-
# cox regression

if __name__ == "__main__":
    import pandas as pd
    import time
    import numpy as np

    from lifelines import CoxPHFitter
    from lifelines.datasets import load_rossi, load_regression_dataset

    reps = 1
    df = load_rossi()
    df = pd.concat([df] * reps)
    cp_breslow = CoxPHFitter(penalizer=0.1,
                             l1_ratio=1.0,
                             baseline_estimation_method="spline")
    start_time = time.time()
    cp_breslow.fit(df,
                   duration_col="week",
                   event_col="arrest",
                   show_progress=True)
    print("--- %s seconds ---" % (time.time() - start_time))
    cp_breslow.print_summary(2)
    print(cp_breslow.score(df))
    print(cp_breslow.score(df, scoring_method="concordance_index"))