Exemplo n.º 1
0
 def test_coxph_plotting_with_subset_of_columns(self, block):
     df = load_regression_dataset()
     cp = CoxPHFitter()
     cp.fit(df, "T", "E")
     cp.plot(columns=["var1", "var2"])
     self.plt.title("test_coxph_plotting_with_subset_of_columns")
     self.plt.show(block=block)
Exemplo n.º 2
0
 def test_coxph_plotting(self, block):
     df = load_regression_dataset()
     cp = CoxPHFitter()
     cp.fit(df, "T", "E")
     cp.plot()
     self.plt.title("test_coxph_plotting")
     self.plt.show(block=block)
Exemplo n.º 3
0
 def test_coxph_plotting_with_hazards_ratios(self, block):
     df = load_regression_dataset()
     cp = CoxPHFitter()
     cp.fit(df, "T", "E")
     cp.plot(hazard_ratios=True)
     self.plt.title("test_coxph_plotting")
     self.plt.show(block=block)
Exemplo n.º 4
0
def fit_cox(
    train_df: Union[pd.DataFrame, str],
    covariates: List[str],
    test_df: Union[pd.DataFrame, str] = None,
    strata: List[str] = None,
    plot: bool = False,
    process_dir: str = None,
):
    if isinstance(train_df, str):
        train_df = pd.read_csv(train_df)
    if isinstance(test_df, str):
        test_df = pd.read_csv(test_df)
    cphf = CoxPHFitter()
    included_cols = ["duration", "event"] + list(covariates)
    print(train_df.columns)
    cphf.fit(
        train_df[included_cols],
        duration_col="duration",
        event_col="event",
        strata=strata,
    )

    results = {
        "log_likelihood":
        cphf.log_likelihood_,
        "concordance_index":
        cphf.concordance_index_,
        "log_likelihood_ratio_test_pvalue":
        cphf.log_likelihood_ratio_test().p_value,
    }

    if test_df is not None:
        results["test_log_likelihood"] = cphf.score(
            test_df[included_cols], scoring_method="log_likelihood")
        results["test_concordance_index"] = cphf.score(
            test_df[included_cols], scoring_method="concordance_index")

    if plot and process_dir is not None:
        plt.figure(figsize=(5, 10))
        cphf.plot()
        plt.savefig(os.path.join(process_dir, "hazard_plot.pdf"))

    if process_dir is not None:
        cphf.summary.to_csv(os.path.join(process_dir, "summary.csv"))
        save_dict_to_json(os.path.join(process_dir, "results.json"), results)

    return results, cphf
def survival_analyze(dataframe,
                     lifetime_col,
                     dead_col,
                     strata_cols,
                     covariate_col=None):
    # Based on notebook here. https://github.com/CamDavidsonPilon/lifelines/tree/master/examples
    import pandas as pd
    from matplotlib import pyplot as plt
    from lifelines import CoxPHFitter

    cph = CoxPHFitter().fit(dataframe,
                            lifetime_col,
                            dead_col,
                            strata=strata_cols)
    cph.plot(ax=ax[1])
    if covariate_col:
        cph.plot_covariate_groups(covariate_col, values=[0, 1])
    pass
Exemplo n.º 6
0
def CoxAnalysis(pd_data, pd_surval, tp):
    cph = CoxPHFitter(penalizer=0.1)
    if tp == 'univariate':
        pd_out = ''
        for i in range(pd_data.shape[1]):
            df = pd_surval.T.append(pd_data.iloc[:, i].T).T
            cph.fit(df, 'OS', event_col='status')
            if type(pd_out) == str:
                pd_out = cph.summary
            else:
                pd_out = pd_out.append(cph.summary)
    elif tp == 'multivariable':
        df = pd_data.T.append(pd_surval.T).T
        df = df.dropna(axis=0, how='any')
        cph.fit(df, 'OS', event_col='status', step_size=0.1)
        pd_out = cph.summary
    pd_out.to_csv('CoxRegress.txt', sep='\t', header=True, index=True)
    plt.style.use('my-paper')
    fig, axe = plt.subplots(figsize=(25, 8))
    cph.plot(ax=axe)
    axe.set_ylim(-0.2, 3.2)
    axe.set_xlim(-2.5, 2.1)
    plt.savefig('CoxRegress.pdf')
Exemplo n.º 7
0
from lifelines import CoxPHFitter
import matplotlib.pyplot as plt
import json
import pandas as pd

with open("test_data_Cox.json", "r", encoding='UTF-8') as f:
    temp = json.loads(f.read())
    regression_dataset = pd.DataFrame(
        temp, columns=['T', 'E', 'base', 'hotestVideo', 'length'])

    print(regression_dataset.head())
    print(regression_dataset['E'].value_counts())

    cph = CoxPHFitter()
    cph.fit(regression_dataset, 'T', event_col='E')
    cph.print_summary()
    cph.plot()
    plt.savefig('Cox3.png')
Exemplo n.º 8
0
    cph.plot()
        
    ''')

    if st.checkbox("Fit Cox model"):
        with st.spinner('Construction in progress...'):
            cph = CoxPHFitter()

            col_list.extend(['Churn', 'Tenure'])

            cph.fit(df[col_list], duration_col='Tenure', event_col='Churn')

            plt.style.use('seaborn-ticks')

            fig = cph.plot()

            plt.tick_params(axis='both', which='major', labelsize=20)

            plt.xlabel("Log Hazard Ratio", fontsize=22)

            plt.tight_layout()

            st.pyplot(height=400)

            plt.clf()

        st.markdown('''
        The **hazard ratio** (HR) of each variable provides a measure of how it influences the probability of the event of interest, in 
        this case customer churn, happening at a given point in time. 
        
Exemplo n.º 9
0
    print(
        np.mean(
            k_fold_cross_validation(fitter,
                                    times,
                                    duration_col='time',
                                    event_col='success')))
    print("End cross-validation of " + name)


from lifelines import CoxPHFitter
cph = CoxPHFitter()
cph.fit(times, duration_col='time', event_col='success', show_progress=True)

cph.print_summary()

save('coxph', cph.plot())
#cph.check_assumptions(times, show_plots=True)

crossValidate('cox', cph)

fitters = {'cox': cph}

from lifelines import WeibullAFTFitter, LogNormalAFTFitter, LogLogisticAFTFitter
for (name, fitter) in [("weibull", WeibullAFTFitter),
                       ("lognormal", LogNormalAFTFitter),
                       ("loglogistic", LogLogisticAFTFitter)]:
    print("BEGIN " + name)
    aft = fitter()
    aft.fit(times, duration_col='time', event_col='success')
    aft.print_summary(3)
Exemplo n.º 10
0
    delimiter="\t")

regression_dataset['Class'] = regression_dataset['Class'].map({
    'YES': 1,
    'NO': 0
})
regression_dataset = regression_dataset.join(
    pd.get_dummies(regression_dataset['clusters']))
regression_dataset = regression_dataset.drop(['id', 'clusters'], axis=1)

print(regression_dataset.head(100))
print(regression_dataset.dtypes)

kmf = KaplanMeierFitter()
kmf.fit(regression_dataset['Time'], event_observed=regression_dataset['Class'])

kmf.survival_function_
survival_function = kmf.plot_survival_function()  # or just kmf.plot()
survival_function.get_figure().savefig("KaplanMeierFitter.png")

# Using Cox Proportional Hazards model
cph = CoxPHFitter(penalizer=0.1)
cph.fit(regression_dataset, 'Time', event_col='Class')
cph.print_summary()

# predict=cph.predict_survival_function(regression_dataset).plot()
# predict.get_figure().savefig("predicted.png")

ax = cph.plot()
ax.get_figure().savefig("CoxPHFitter.png")
Exemplo n.º 11
0
                                          y_te_eicu, X_te_mimic, y_te_mimic)
    nzeros = neq_zero(best_cph.params_)
    coefs = nzeros.index[nzeros.values].to_list()
    coefs_val = best_cph.params_[nzeros.values].tolist()

    summary = {
        "penalizer": p,
        "C_train": ctr,
        "C_eicu:": ceicu,
        "C_mimic": cmimic,
        "nfeatures": len(coefs),
        "features": coefs,
        "coefs": coefs_val,
        "df": pd.DataFrame({
            "features": coefs,
            "coefs": coefs_val
        })
    }
    cph_results[best_ps[i]] = summary
    best_cphs.append(best_cph)
    plt.rcParams['figure.figsize'] = [5, 10]
    fig = best_cph.plot()
    figures.append(fig)
    plt.close()
    print(summary)

with open("{}/{}_summary.pkl".format(model_path, tag), "wb") as fout:
    pickle.dump(cph_results, fout)

print(cph_results)