def test_aaf_panel_dataset(self, block): panel_dataset = load_panel_test() aaf = AalenAdditiveFitter() aaf.fit(panel_dataset, id_col="id", duration_col="t", event_col="E") aaf.plot() self.plt.title("test_aaf_panel_dataset") self.plt.show(block=block) return
def test_aalen_additive_fit_with_censor(self, block): n = 2500 d = 6 timeline = np.linspace(0, 70, 10000) hz, coef, X = generate_hazard_rates(n, d, timeline) X.columns = coef.columns cumulative_hazards = pd.DataFrame(cumulative_integral( coef.values, timeline), index=timeline, columns=coef.columns) T = generate_random_lifetimes(hz, timeline) T[np.isinf(T)] = 10 X["T"] = T X["E"] = np.random.binomial(1, 0.99, n) aaf = AalenAdditiveFitter() aaf.fit(X, "T", "E") for i in range(d + 1): ax = self.plt.subplot(d + 1, 1, i + 1) col = cumulative_hazards.columns[i] ax = cumulative_hazards[col].loc[:15].plot(ax=ax) ax = aaf.plot(loc=slice(0, 15), ax=ax, columns=[col]) self.plt.title("test_aalen_additive_fit_with_censor") self.plt.show(block=block) return
def test_aalen_additive_plot(self, block): # this is a visual test of the fitting the cumulative # hazards. n = 2500 d = 3 timeline = np.linspace(0, 70, 10000) hz, coef, X = generate_hazard_rates(n, d, timeline) T = generate_random_lifetimes(hz, timeline) T[np.isinf(T)] = 10 C = np.random.binomial(1, 1.0, size=n) X["T"] = T X["E"] = C # fit the aaf, no intercept as it is already built into X, X[2] is ones aaf = AalenAdditiveFitter(coef_penalizer=0.1, fit_intercept=False) aaf.fit(X, "T", "E") ax = aaf.plot(iloc=slice(0, aaf.cumulative_hazards_.shape[0] - 100)) ax.set_xlabel("time") ax.set_title("test_aalen_additive_plot") self.plt.show(block=block) return
#Survival Regression using the following covariates : Couple Race, Income Range, State and Marriage Date X = patsy.dmatrix( 'State + Couple_Race + Household_Income_Range + Husband_Education + Husband_Race + Marriage_Date -1', data, return_type='dataframe') aaf = AalenAdditiveFitter(coef_penalizer=1.0, fit_intercept=True) X['T'] = data['Duration'] X['E'] = data['Divorce'] aaf.fit(X, 'T', event_col='E') aaf.cumulative_hazards_.head() sns.set() aaf.plot(columns=[ 'State[Alabama]', 'baseline', 'Couple_Race[T.Same-Race]', 'Household_Income_Range[T.42,830$ - 44,765$]' ], ix=slice(1, 15)) plt.savefig( '/home/raed/Dropbox/INSE - 6320/Final Project/Survival_Regression_for_Alabamae.pdf' ) plt.show() aaf.plot(columns=[ 'State[Mississippi]', 'baseline', 'Couple_Race[T.Same-Race]', 'Household_Income_Range[T.42,830$ - 44,765$]' ], ix=slice(1, 15)) plt.savefig( '/home/raed/Dropbox/INSE - 6320/Final Project/Survival_Regression_for_Mississippi.pdf' )
from lifelines.datasets import generate_regression_dataset regression_dataset = generate_regression_dataset() from lifelines import AalenAdditiveFitter, CoxPHFitter cf = CoxPHFitter() cf.fit(regression_dataset, duration_col='T', event_col='E') aaf = AalenAdditiveFitter(fit_intercept=False) aaf.fit(regression_dataset, duration_col='T', event_col='E') x = regression_dataset[regression_dataset.columns - ['E','T']] aaf.predict_survival_function(x.ix[10:12]).plot() aaf.plot()
df = df[df['Duration'] != 0] df2 = df.loc[:, [ 'DISTRIBUTION CHANNEL', 'GENDER', 'SMOKER STATUS', 'PremiumPattern', 'BENEFITS TYPE', 'BROKER COMM' ]] #df2 = df.loc[:, ['GENDER', 'SMOKER STATUS', 'PremiumPattern']] #df2 = df.loc[:, ['SMOKER STATUS', 'GENDER']] df2 = pd.get_dummies(df2) #T = df['Duration'] E = df['LapseIndicator'].apply(lambda x: True if x == 1 else False) df2['E'] = E df2['T'] = T aaf = AalenAdditiveFitter() aaf.fit(df2, 'T', event_col='E', show_progress=True) pickle.dump(aaf, open('Smoker_Gender_All.pkl', 'wb')) aaf.plot() #cph = CoxPHFitter() #cph.fit(df2, duration_col='T', event_col='E', show_progress=True, strata=['SMOKER STATUS_No','SMOKER STATUS_Yes', # 'GENDER_F', 'GENDER_M']) #pickle.dump(cph, open('Smoker_Gender_CPF.pkl', 'wb')) #cph.plot()