Exemplo n.º 1
0
 def test_coxph_plot_covariate_groups_with_single_strata(self, block):
     df = load_rossi()
     cp = CoxPHFitter()
     cp.fit(df, "week", "arrest", strata="paro")
     cp.plot_covariate_groups("age", [10, 50, 80])
     self.plt.title("test_coxph_plot_covariate_groups_with_strata")
     self.plt.show(block=block)
Exemplo n.º 2
0
 def test_coxph_plot_covariate_groups_with_multiple_variables(self, block):
     df = load_rossi()
     cp = CoxPHFitter()
     cp.fit(df, "week", "arrest")
     cp.plot_covariate_groups(["age", "prio"], [[10, 0], [50, 10], [80, 90]])
     self.plt.title("test_coxph_plot_covariate_groups_with_multiple_variables")
     self.plt.show(block=block)
Exemplo n.º 3
0
 def test_coxph_plot_covariate_groups(self, block):
     df = load_rossi()
     cp = CoxPHFitter()
     cp.fit(df, "week", "arrest")
     cp.plot_covariate_groups("age", [10, 50, 80])
     self.plt.title("test_coxph_plot_covariate_groups")
     self.plt.show(block=block)
Exemplo n.º 4
0
 def test_coxph_plot_covariate_groups_with_nonnumeric_strata(self, block):
     df = load_rossi()
     df["strata"] = np.random.choice(["A", "B"], size=df.shape[0])
     cp = CoxPHFitter()
     cp.fit(df, "week", "arrest", strata="strata")
     cp.plot_covariate_groups("age", [10, 50, 80])
     self.plt.title("test_coxph_plot_covariate_groups_with_single_strata")
     self.plt.show(block=block)
Exemplo n.º 5
0
 def test_coxph_plot_covariate_groups_with_multiple_variables_and_strata(self, block):
     df = load_rossi()
     df["strata"] = np.random.choice(["A", "B"], size=df.shape[0])
     cp = CoxPHFitter()
     cp.fit(df, "week", "arrest", strata="strata")
     cp.plot_covariate_groups(["age", "prio"], [[10, 0], [50, 10], [80, 90]])
     self.plt.title("test_coxph_plot_covariate_groups_with_multiple_variables_and_strata")
     self.plt.show(block=block)
def survival_analyze(dataframe,
                     lifetime_col,
                     dead_col,
                     strata_cols,
                     covariate_col=None):
    # Based on notebook here. https://github.com/CamDavidsonPilon/lifelines/tree/master/examples
    import pandas as pd
    from matplotlib import pyplot as plt
    from lifelines import CoxPHFitter

    cph = CoxPHFitter().fit(dataframe,
                            lifetime_col,
                            dead_col,
                            strata=strata_cols)
    cph.plot(ax=ax[1])
    if covariate_col:
        cph.plot_covariate_groups(covariate_col, values=[0, 1])
    pass
one_hot_train = to_one_hot(df_train, to_encode)
one_hot_val = to_one_hot(df_val, to_encode)
one_hot_test = to_one_hot(df_test, to_encode)

print(one_hot_val.columns.tolist())
print(f"There are {len(one_hot_val.columns)} columns")

print(one_hot_train.shape)
one_hot_train.head()

cph = CoxPHFitter()
cph.fit(one_hot_train, duration_col='time', event_col='status', step_size=0.1)

cph.print_summary()

cph.plot_covariate_groups('edema_1.0', values=[0, 1])


def hazard_ratio(case_1, case_2, cox_params):

    hr = np.exp(np.dot(cox_params, (case_1 - case_2)))

    return hr


i = 1
case_1 = one_hot_train.iloc[i, :].drop(['time', 'status'])

j = 5
case_2 = one_hot_train.iloc[j, :].drop(['time', 'status'])
Exemplo n.º 8
0
# </summary>
# <p>
# <ul>
# <ul>
#     <li>You should see that the treatment (trt) was beneficial because it has a negative impact on the hazard (the coefficient is negative, and exp(coef) is less than 1).</li>
#     <li>The associated hazard ratio is ~0.8, because this is the exp(coef) of treatment.</li>
# </ul>
# </p>

# We can compare the predicted survival curves for treatment variables. Run the next cell to plot survival curves using the `plot_covariate_groups()` function.
# - The y-axis is th survival rate
# - The x-axis is time

# In[15]:

cph.plot_covariate_groups('trt', values=[0, 1])

# Notice how the group without treatment has a lower survival rate at all times (the x-axis is time) compared to the treatment group.

# <a name='6'></a>
# ## 6. Hazard Ratio
#
# Recall from the lecture videos that the Hazard Ratio between two patients was the likelihood of one patient (e.g smoker) being more at risk than the other (e.g non-smoker).
# $$
# \frac{\lambda_{smoker}(t)}{\lambda_{nonsmoker}(t)} = e^{\theta (X_{smoker} - X_{nonsmoker})^T}
# $$
#
# Where
#
# $$
# \lambda_{smoker}(t) = \lambda_0(t)e^{\theta X_{smoker}^T}
cph.print_summary()

# awesome!!!
# There are a few important things to notice about this output.
# 1. We can see the number of observations listed as n=5634 right at the top of the output, next to that we have our
#   number of events (churned customers).
# 2. We get the coefficients of our model. These are very important and they tell us how each feature increases risk,
#   so if it’s a positive number that attribute makes a customer more likely to churn, and if it is negative then
#   customers with that feature are less likely to churn.
# 3. We get significance codes for our features. A very nice addition!
# 4. We get the concordance. Our model has a concordance of .929 out of 1, so it’s a very good Cox model. We can use
#   this to compare between models, kind of like accuracy in Logistic Regression.

# lets actually plot all of this to get a better picture
cph.plot()
cph.plot_covariate_groups('TotalCharges', values=[0,4000], cmap='coolwarm')
# you can see in the survival curve plot that customers that have Total charges closer to 0 are at a higher risk of
# churning compared to those with charges closer to 4000.

# now lets do some churn prediction now that we have some useful insights into what makes customers churn.
# lets take all the non churners as we can't retain those who have already churned, these are called censored_subjects
# sticking to Survival Analysis lingo.
censored_subjects = data.loc[data['Churn_Yes'] == 0]

# now we can predict their unconditioned survival curves
unconditioned_sf = cph.predict_survival_function(censored_subjects)
# these are unconditioned because we will predict some churn before the customers current tenure time.

# lets condition the above prediction
conditioned_sf = unconditioned_sf.apply(lambda c: (c/c.loc[data.loc[c.name, 'tenure']]).clip_upper(1))
# Take a peek at one of the transformed data sets for new features. 
print(one_hot_train.shape)
one_hot_train.head()

# Run the following cell to fit your Cox Proportional Hazards model using the `lifelines` package.
cph = CoxPHFitter()
cph.fit(one_hot_train, duration_col = 'time', event_col = 'status', step_size=0.1)


# Use `cph.print_summary()` to view the coefficients associated with each covariate as well as confidence intervals. 
cph.print_summary()


# Run the next cell to plot survival curves using the `plot_covariate_groups()` function. 
cph.plot_covariate_groups('trt', values=[0, 1]); # Can compare the predicted survival curves for treatment variables.


# Write a function to compute the hazard ratio between two individuals given the cox model's coefficients
def hazard_ratio(case_1, case_2, cox_params):
    '''
    Return the hazard ratio of case_1 : case_2 using
    the coefficients of the cox model.
    
    Args:
        case_1 (np.array): (1 x d) array of covariates
        case_2 (np.array): (1 x d) array of covariates
        model (np.array): (1 x d) array of cox model coefficients
    Returns:
        hazard_ratio (float): hazard ratio of case_1 : case_2
    '''
Exemplo n.º 11
0
T = data['Adjusted Time to Delivery']
C = (data['Vaginal Delivery'] == 'Yes')

kmf.fit(T[~ix], C[~ix], label='Routine Interval Dosing', alpha=.95)
ax = kmf.plot(show_censors=True, color='b')

kmf.fit(T[ix], C[ix], label='Extended Interval Dosing', alpha=.95)
kmf.plot(ax=ax, show_censors=True, color='r')

ax.set_xlim(0, 60)
ax.set_ylim(0, 1)
plt.xlabel('Time to Delivery (hrs)')
plt.ylabel('Frac. Not Delivered')
plt.title(
    'Delay Adjusted Time to Vaginal Delivery as a Function of Dosing Interval')
plt.savefig('output/adjusted_KM.png')

cph = CoxPHFitter()
cph.fit(data[['Time to Delivery', 'Vaginal Delivery', 'CumulativeDelay']],
        duration_col='Time to Delivery',
        event_col='Vaginal Delivery',
        show_progress=True)

cph.plot_covariate_groups('CumulativeDelay', [0, 2, 4, 8, 16])
ax.set_xlim(0, 60)
ax.set_ylim(0, 1)
plt.xlabel('Time to Delivery (hrs)')
plt.ylabel('Frac. Not Delivered')
plt.title('Predicted Delivery Curves as a Function of Cumulative Dosing Delay')
plt.savefig('output/predicted_KM.png')