def test_log_rank_test_on_waltons_dataset(): df = load_waltons() ix = df['group'] == 'miR-137' waltonT1 = df.loc[ix]['T'] waltonT2 = df.loc[~ix]['T'] result = stats.logrank_test(waltonT1, waltonT2) assert result.p_value < 0.05
def test_survival_difference_at_fixed_point_in_time_test_nonparametric(): df = load_waltons() ix = df["group"] == "miR-137" kmf1 = KaplanMeierFitter().fit(df.loc[ix]["T"], df.loc[ix]["E"]) kmf2 = KaplanMeierFitter().fit(df.loc[~ix]["T"], df.loc[~ix]["E"]) result = stats.survival_difference_at_fixed_point_in_time_test(10, kmf1, kmf2) assert result.p_value < 0.05
def test_waltons_dataset(): df = load_waltons() ix = df['group'] == 'miR-137' waltonT1 = df.loc[ix]['T'] waltonT2 = df.loc[~ix]['T'] result = stats.logrank_test(waltonT1, waltonT2) assert result.p_value < 0.05
def test_survival_table_from_events_at_risk_column(): df = load_waltons() # from R expected = [163.0, 162.0, 160.0, 157.0, 154.0, 152.0, 151.0, 148.0, 144.0, 139.0, 134.0, 133.0, 130.0, 128.0, 126.0, 119.0, 118.0, 108.0, 107.0, 99.0, 96.0, 89.0, 87.0, 69.0, 65.0, 49.0, 38.0, 36.0, 27.0, 24.0, 14.0, 1.0] df = utils.survival_table_from_events(df['T'], df['E']) assert list(df['at_risk'][1:]) == expected # skip the first event as that is the birth time, 0.
def test_rmst_plot_with_single_model(self, block): waltons = load_waltons() kmf = KaplanMeierFitter().fit(waltons["T"], waltons["E"]) rmst_plot(kmf, t=40.0) self.plt.title("test_rmst_plot_with_single_model") self.plt.show(block=block)
def test_kmf_add_at_risk_counts_with_custom_subplot(self, block, kmf): # https://github.com/CamDavidsonPilon/lifelines/issues/991#issuecomment-614427882 import lifelines import matplotlib as mpl from lifelines.datasets import load_waltons plt = self.plt waltons = load_waltons() ix = waltons["group"] == "control" img_no = 3 height = 4 * img_no half_inch = 0.5 / height # in percent height _fig = plt.figure(figsize=(6, height), dpi=100) gs = mpl.gridspec.GridSpec(img_no, 1) # plt.subplots_adjust(left=0.08, right=0.98, bottom=half_inch, top=1 - half_inch) for i in range(img_no): ax = plt.subplot(gs[i, 0]) kmf_control = lifelines.KaplanMeierFitter() ax = kmf_control.fit(waltons.loc[ix]["T"], waltons.loc[ix]["E"], label="control").plot(ax=ax) kmf_exp = lifelines.KaplanMeierFitter() ax = kmf_exp.fit(waltons.loc[~ix]["T"], waltons.loc[~ix]["E"], label="exp").plot(ax=ax) ax = lifelines.plotting.add_at_risk_counts(kmf_exp, kmf_control, ax=ax) plt.subplots_adjust(hspace=0.6) plt.title("test_kmf_add_at_risk_counts_with_custom_subplot") plt.show(block=block)
def test_hide_ci_from_legend(self, block): waltons = load_waltons() kmf = KaplanMeierFitter().fit(waltons["T"], waltons["E"]) ax = kmf.plot(ci_show=True, ci_only_lines=True, ci_legend=False) ax.legend(title="Legend title") self.plt.title("test_hide_ci_from_legend") self.plt.show(block=block)
def test_correct_output_for_complex_input(self): df = load_waltons() df.insert(0, 'id', df.index) duration = df[['id', 'T']].copy() duration.insert(1, 'feature', 'duration') duration.columns.values[2] = 'value' event_observed = df[['id', 'E']].copy() event_observed.insert(1, 'feature', 'was_observed') event_observed.columns.values[2] = 'value' event_observed = event_observed[event_observed['value'] == 1] categories = df[['id', 'group']].copy() categories.insert(1, 'feature', 'group') categories.columns.values[2] = 'value' results = self.task.main(durations=[duration], categories=[categories], event_observed=[event_observed], estimator='NelsonAalen', id_filter=[], subsets=[]) assert results['label'] == 'duration' assert len(results['categories']) == 2 assert len(results['subsets']) == 1 assert results['stats']['control'][0]['timeline'] assert results['stats']['control'][0]['estimate'] assert results['stats']['control'][0]['ci_lower'] assert results['stats']['control'][0]['ci_upper'] assert results['stats']['miR-137'][0]['timeline'] assert results['stats']['miR-137'][0]['estimate'] assert results['stats']['miR-137'][0]['ci_lower'] assert results['stats']['miR-137'][0]['ci_upper']
def test_log_rank_test_on_waltons_dataset(): df = load_waltons() ix = df["group"] == "miR-137" waltonT1 = df.loc[ix]["T"] waltonT2 = df.loc[~ix]["T"] result = stats.logrank_test(waltonT1, waltonT2) assert result.p_value < 0.05
def test_survival_difference_at_fixed_point_in_time_test(): df = load_waltons() ix = df["group"] == "miR-137" waltonT1 = df.loc[ix]["T"] waltonT2 = df.loc[~ix]["T"] result = stats.survival_difference_at_fixed_point_in_time_test(10, waltonT1, waltonT2) assert result.p_value < 0.05
def test_survival_difference_at_fixed_point_in_time_test_parametric(): df = load_waltons() ix = df["group"] == "miR-137" wf1 = WeibullFitter().fit(df.loc[ix]["T"], df.loc[ix]["E"]) wf2 = WeibullFitter().fit(df.loc[~ix]["T"], df.loc[~ix]["E"]) result = stats.survival_difference_at_fixed_point_in_time_test(10, wf1, wf2) assert result.p_value < 0.05
def test_waltons_dataset(): df = load_waltons() ix = df['group'] == 'miR-137' waltonT1 = df.loc[ix]['T'] waltonT2 = df.loc[~ix]['T'] result = stats.logrank_test(waltonT1, waltonT2) assert result.is_significant
def test_group_survival_table_from_events_on_waltons_data(): df = load_waltons() first_obs = np.zeros(df.shape[0]) g, removed, observed, censored = utils.group_survival_table_from_events(df["group"], df["T"], df["E"], first_obs) assert len(g) == 2 assert all(removed.columns == ["removed:miR-137", "removed:control"]) assert all(removed.index == observed.index) assert all(removed.index == censored.index)
def test_group_survival_table_from_events_on_waltons_data(): df = load_waltons() first_obs = np.zeros(df.shape[0]) g, removed, observed, censored = utils.group_survival_table_from_events(df['group'], df['T'], df['E'], first_obs) assert len(g) == 2 assert all(removed.columns == ['removed:miR-137', 'removed:control']) assert all(removed.index == observed.index) assert all(removed.index == censored.index)
def test_rmst_plot_with_two_model(self, block): waltons = load_waltons() ix = waltons["group"] == "control" kmf_con = KaplanMeierFitter().fit(waltons.loc[ix]["T"], waltons.loc[ix]["E"], label="control") kmf_exp = KaplanMeierFitter().fit(waltons.loc[~ix]["T"], waltons.loc[~ix]["E"], label="exp") rmst_plot(kmf_con, model2=kmf_exp, t=40.0) self.plt.title("test_rmst_plot_with_two_model") self.plt.show(block=block)
def test_logx_plotting(self, block): waltons = load_waltons() kmf = KaplanMeierFitter().fit(np.exp(waltons["T"]), waltons["E"], timeline=np.logspace(0, 40)) ax = kmf.plot(logx=True) wf = WeibullFitter().fit(np.exp(waltons["T"]), waltons["E"], timeline=np.logspace(0, 40)) wf.plot_survival_function(logx=True, ax=ax) self.plt.title("test_logx_plotting") self.plt.show(block=block)
def test_can_handle_nans(self): df = load_waltons() df.insert(0, 'id', df.index) duration = df[['id', 'T']].copy() duration.insert(1, 'feature', 'duration') duration.columns.values[2] = 'value' duration.loc[duration.index % 2 == 0, 'value'] = float('nan') self.task.main(durations=[duration], categories=[], event_observed=[], estimator='KaplanMeier', id_filter=[], subsets=[])
def test_seaborn_doesnt_cause_kmf_plot_error(self, block, kmf, capsys): import seaborn as sns df = load_waltons() T = df['T'] E = df['E'] kmf = KaplanMeierFitter() kmf.fit(T, event_observed=E) kmf.plot() self.plt.title('test_seaborn_doesnt_cause_kmf_plot_error') self.plt.show(block=block) _, err = capsys.readouterr() assert err == ""
def test_seaborn_doesnt_cause_kmf_plot_error(self, block, kmf, capsys): import seaborn as sns df = load_waltons() T = df["T"] E = df["E"] kmf = KaplanMeierFitter() kmf.fit(T, event_observed=E) kmf.plot() self.plt.title("test_seaborn_doesnt_cause_kmf_plot_error") self.plt.show(block=block) _, err = capsys.readouterr() assert err == ""
def test_can_handle_empty_groups(self): df = load_waltons() df.insert(0, 'id', df.index) subset1 = df[df['group'] == 'control']['id'].tolist() subset2 = df[df['group'] == 'miR-137']['id'].tolist() duration = df[['id', 'T']].copy() categories = df[['id', 'group']].copy() duration.insert(1, 'feature', 'duration') categories.insert(1, 'feature', 'group') duration.columns.values[2] = 'value' categories.columns.values[2] = 'value' results = self.task.main(durations=[duration], categories=[categories], event_observed=[], estimator='KaplanMeier', id_filter=[], subsets=[subset1, subset2]) assert not results['stats']['miR-137'].get(0) assert not results['stats']['control'].get(1)
def test_correct_output_for_simple_input(self): df = load_waltons() df.insert(0, 'id', df.index) duration = df[['id', 'T']].copy() duration.insert(1, 'feature', 'duration') duration.columns.values[2] = 'value' results = self.task.main(durations=[duration], categories=[], event_observed=[], estimator='KaplanMeier', id_filter=[], subsets=[]) assert results['label'] == 'duration' assert len(results['categories']) == 1 assert len(results['subsets']) == 1 assert results['stats'][''][0]['timeline'] assert results['stats'][''][0]['estimate'] assert results['stats'][''][0]['ci_lower'] assert results['stats'][''][0]['ci_upper']
def test_log_rank_test_on_waltons_dataset_with_case_weights(): df = load_waltons() ix = df["group"] == "miR-137" waltonT1 = df.loc[ix]["T"] waltonT2 = df.loc[~ix]["T"] result = stats.logrank_test(waltonT1, waltonT2) print(result) dfw = df.groupby(["T", "E", "group" ]).size().reset_index().rename(columns={0: "weights"}) ixw = dfw["group"] == "miR-137" waltonT1w = dfw.loc[ixw]["T"] waltonT2w = dfw.loc[~ixw]["T"] weightsA = dfw.loc[ixw]["weights"] weightsB = dfw.loc[~ixw]["weights"] resultw = stats.logrank_test(waltonT1w, waltonT2w, weights_A=weightsA, weights_B=weightsB) assert_frame_equal(resultw.summary, result.summary)
def get_sa(request): dirname = os.path.dirname(os.path.dirname(__file__)).replace('\\', '/') kmffile = '/images/test1.jpg' naffile = '/images/test2.jpg' context = {} context['kmf'] = kmffile context['naf'] = naffile if not os.path.exists(dirname + kmffile) and not os.path.exists(dirname + naffile): df = load_waltons() T = df['T'] # an array of durations E = df['E'] # a either boolean or binary array representing whether the 'death' was observed (alternatively an individual can be censored) kmf = KaplanMeierFitter(alpha=0.95) kmf.fit(durations=T, event_observed=E, timeline=None, entry=None, label='KM_estimate', alpha=None, left_censorship=False, ci_labels=None) naf = NelsonAalenFitter(alpha=0.95, nelson_aalen_smoothing=True) naf.fit(durations=T, event_observed=E, timeline=None, entry=None, label='NA_estimate', alpha=None, ci_labels=None) kmf.plot() plt.savefig(dirname + kmffile) naf.plot() plt.savefig(dirname + naffile) # return render_to_response(template_name='sa_test.html', context=context, context_instance=RequestContext(request=request)) return render(request=request, template_name='sa_test.html', context=context)
def test_pairwise_waltons_dataset_is_significantly_different(): waltons_dataset = load_waltons() R = stats.pairwise_logrank_test(waltons_dataset["T"], waltons_dataset["group"]) assert R.summary.loc[("control", "miR-137")]["p"] < 0.05
def test_pairwise_waltons_dataset_is_significantly_different(): waltons_dataset = load_waltons() R = stats.pairwise_logrank_test(waltons_dataset['T'], waltons_dataset['group']) assert R.values[0, 1].is_significant
# author: Thomas Haslwanter, date: Jun-2015 # Import standard packages import matplotlib.pyplot as plt import C2_8_mystyle as mystyle # additional packages from lifelines.datasets import load_waltons from lifelines import KaplanMeierFitter from lifelines.statistics import logrank_test # Set my favorite font mystyle.set() # Load and show the data df = load_waltons() # returns a Pandas DataFrame print(df.head()) ''' T E group 0 6 1 miR-137 1 13 1 miR-137 2 13 1 miR-137 3 13 1 miR-137 4 19 1 miR-137 ''' T = df['T'] E = df['E'] groups = df['group']
from lifelines import KaplanMeierFitter from lifelines.datasets import load_waltons import matplotlib.pyplot as plt df = load_waltons() T = df['T'] E = df['E'] kmf = KaplanMeierFitter() kmf.fit(T, E) fig = plt.figure() ax = kmf.plot() plt.savefig('plots/KM_lifelines_test.png')
def waltons_dataset(): return load_waltons()
# """ Description: lifelines survival analysis example. """ #============================================================================== import pandas as pd import numpy as np import matplotlib.pyplot as plt from lifelines.datasets import load_waltons from lifelines import KaplanMeierFitter, NelsonAalenFitter plt.close('all') df = load_waltons() # returns a Pandas DataFrame print(df.head()) """ T E group 0 6 1 miR-137 1 13 1 miR-137 2 13 1 miR-137 3 13 1 miR-137 4 19 1 miR-137 """ T = df['T'] E = df['E'] # Fit the survival curve
def test_survival_table_from_events_binned_with_empty_bin(): df = load_waltons() ix = df["group"] == "miR-137" event_table = utils.survival_table_from_events( df.loc[ix]["T"], df.loc[ix]["E"], intervals=[0, 10, 20, 30, 40, 50]) assert not pd.isnull(event_table).any().any()
def waltons(): return load_waltons()[["T", "E"]].iloc[:50]
def test_pairwise_waltons_dataset_is_significantly_different(): waltons_dataset = load_waltons() R = stats.pairwise_logrank_test(waltons_dataset['T'], waltons_dataset['group']) assert R.values[0, 1].p_value < 0.05
def test_pairwise_waltons_dataset_is_significantly_different(): waltons_dataset = load_waltons() R = stats.pairwise_logrank_test(waltons_dataset["T"], waltons_dataset["group"]) assert R.values[0, 1].p_value < 0.05
# Tutorial from: http://lifelines.readthedocs.org/en/latest/Quickstart.html # Import library from lifelines.datasets import load_waltons # Load data frame df = load_waltons() # Print dataframe print (df.head()) # Get separare frame for event and time T = df['T'] E = df['E'] from lifelines import KaplanMeierFitter kmf = KaplanMeierFitter() kmf.fit(T, event_observed=E) # more succiently, kmf.fit(T,E) kmf.survival_function_ kmf.median_ kmf.plot() # Multiple groups groups = df['group']