示例#1
0
def test_log_rank_test_on_waltons_dataset():
    df = load_waltons()
    ix = df['group'] == 'miR-137'
    waltonT1 = df.loc[ix]['T']
    waltonT2 = df.loc[~ix]['T']
    result = stats.logrank_test(waltonT1, waltonT2)
    assert result.p_value < 0.05
示例#2
0
def test_survival_difference_at_fixed_point_in_time_test_nonparametric():
    df = load_waltons()
    ix = df["group"] == "miR-137"
    kmf1 = KaplanMeierFitter().fit(df.loc[ix]["T"], df.loc[ix]["E"])
    kmf2 = KaplanMeierFitter().fit(df.loc[~ix]["T"], df.loc[~ix]["E"])
    result = stats.survival_difference_at_fixed_point_in_time_test(10, kmf1, kmf2)
    assert result.p_value < 0.05
示例#3
0
def test_waltons_dataset():
    df = load_waltons()
    ix = df['group'] == 'miR-137'
    waltonT1 = df.loc[ix]['T']
    waltonT2 = df.loc[~ix]['T']
    result = stats.logrank_test(waltonT1, waltonT2)
    assert result.p_value < 0.05
示例#4
0
def test_survival_table_from_events_at_risk_column():
    df = load_waltons()
    # from R
    expected = [163.0, 162.0, 160.0, 157.0, 154.0, 152.0, 151.0, 148.0, 144.0, 139.0, 134.0, 133.0, 130.0, 128.0, 126.0, 119.0, 118.0,
                108.0, 107.0, 99.0, 96.0, 89.0, 87.0, 69.0, 65.0, 49.0, 38.0, 36.0, 27.0, 24.0, 14.0, 1.0]
    df = utils.survival_table_from_events(df['T'], df['E'])
    assert list(df['at_risk'][1:]) == expected  # skip the first event as that is the birth time, 0.
示例#5
0
    def test_rmst_plot_with_single_model(self, block):
        waltons = load_waltons()
        kmf = KaplanMeierFitter().fit(waltons["T"], waltons["E"])

        rmst_plot(kmf, t=40.0)
        self.plt.title("test_rmst_plot_with_single_model")
        self.plt.show(block=block)
示例#6
0
    def test_kmf_add_at_risk_counts_with_custom_subplot(self, block, kmf):
        # https://github.com/CamDavidsonPilon/lifelines/issues/991#issuecomment-614427882
        import lifelines
        import matplotlib as mpl
        from lifelines.datasets import load_waltons

        plt = self.plt
        waltons = load_waltons()
        ix = waltons["group"] == "control"

        img_no = 3

        height = 4 * img_no
        half_inch = 0.5 / height  # in percent height
        _fig = plt.figure(figsize=(6, height), dpi=100)
        gs = mpl.gridspec.GridSpec(img_no, 1)
        # plt.subplots_adjust(left=0.08, right=0.98, bottom=half_inch, top=1 - half_inch)

        for i in range(img_no):
            ax = plt.subplot(gs[i, 0])
            kmf_control = lifelines.KaplanMeierFitter()
            ax = kmf_control.fit(waltons.loc[ix]["T"], waltons.loc[ix]["E"], label="control").plot(ax=ax)
            kmf_exp = lifelines.KaplanMeierFitter()
            ax = kmf_exp.fit(waltons.loc[~ix]["T"], waltons.loc[~ix]["E"], label="exp").plot(ax=ax)
            ax = lifelines.plotting.add_at_risk_counts(kmf_exp, kmf_control, ax=ax)

        plt.subplots_adjust(hspace=0.6)
        plt.title("test_kmf_add_at_risk_counts_with_custom_subplot")
        plt.show(block=block)
示例#7
0
def test_survival_table_from_events_at_risk_column():
    df = load_waltons()
    # from R
    expected = [163.0, 162.0, 160.0, 157.0, 154.0, 152.0, 151.0, 148.0, 144.0, 139.0, 134.0, 133.0, 130.0, 128.0, 126.0, 119.0, 118.0, 
                108.0, 107.0, 99.0, 96.0, 89.0, 87.0, 69.0, 65.0, 49.0, 38.0, 36.0, 27.0, 24.0, 14.0, 1.0]
    df = utils.survival_table_from_events(df['T'], df['E'])
    assert list(df['at_risk'][1:]) == expected # skip the first event as that is the birth time, 0.
示例#8
0
 def test_hide_ci_from_legend(self, block):
     waltons = load_waltons()
     kmf = KaplanMeierFitter().fit(waltons["T"], waltons["E"])
     ax = kmf.plot(ci_show=True, ci_only_lines=True, ci_legend=False)
     ax.legend(title="Legend title")
     self.plt.title("test_hide_ci_from_legend")
     self.plt.show(block=block)
示例#9
0
 def test_correct_output_for_complex_input(self):
     df = load_waltons()
     df.insert(0, 'id', df.index)
     duration = df[['id', 'T']].copy()
     duration.insert(1, 'feature', 'duration')
     duration.columns.values[2] = 'value'
     event_observed = df[['id', 'E']].copy()
     event_observed.insert(1, 'feature', 'was_observed')
     event_observed.columns.values[2] = 'value'
     event_observed = event_observed[event_observed['value'] == 1]
     categories = df[['id', 'group']].copy()
     categories.insert(1, 'feature', 'group')
     categories.columns.values[2] = 'value'
     results = self.task.main(durations=[duration],
                              categories=[categories],
                              event_observed=[event_observed],
                              estimator='NelsonAalen',
                              id_filter=[],
                              subsets=[])
     assert results['label'] == 'duration'
     assert len(results['categories']) == 2
     assert len(results['subsets']) == 1
     assert results['stats']['control'][0]['timeline']
     assert results['stats']['control'][0]['estimate']
     assert results['stats']['control'][0]['ci_lower']
     assert results['stats']['control'][0]['ci_upper']
     assert results['stats']['miR-137'][0]['timeline']
     assert results['stats']['miR-137'][0]['estimate']
     assert results['stats']['miR-137'][0]['ci_lower']
     assert results['stats']['miR-137'][0]['ci_upper']
def test_log_rank_test_on_waltons_dataset():
    df = load_waltons()
    ix = df["group"] == "miR-137"
    waltonT1 = df.loc[ix]["T"]
    waltonT2 = df.loc[~ix]["T"]
    result = stats.logrank_test(waltonT1, waltonT2)
    assert result.p_value < 0.05
def test_survival_difference_at_fixed_point_in_time_test():
    df = load_waltons()
    ix = df["group"] == "miR-137"
    waltonT1 = df.loc[ix]["T"]
    waltonT2 = df.loc[~ix]["T"]
    result = stats.survival_difference_at_fixed_point_in_time_test(10, waltonT1, waltonT2)
    assert result.p_value < 0.05
示例#12
0
def test_survival_difference_at_fixed_point_in_time_test_parametric():
    df = load_waltons()
    ix = df["group"] == "miR-137"
    wf1 = WeibullFitter().fit(df.loc[ix]["T"], df.loc[ix]["E"])
    wf2 = WeibullFitter().fit(df.loc[~ix]["T"], df.loc[~ix]["E"])
    result = stats.survival_difference_at_fixed_point_in_time_test(10, wf1, wf2)
    assert result.p_value < 0.05
示例#13
0
def test_waltons_dataset():
    df = load_waltons()
    ix = df['group'] == 'miR-137'
    waltonT1 = df.loc[ix]['T']
    waltonT2 = df.loc[~ix]['T']
    result = stats.logrank_test(waltonT1, waltonT2)
    assert result.is_significant
示例#14
0
def test_group_survival_table_from_events_on_waltons_data():
    df = load_waltons()
    first_obs = np.zeros(df.shape[0])
    g, removed, observed, censored = utils.group_survival_table_from_events(df["group"], df["T"], df["E"], first_obs)
    assert len(g) == 2
    assert all(removed.columns == ["removed:miR-137", "removed:control"])
    assert all(removed.index == observed.index)
    assert all(removed.index == censored.index)
示例#15
0
def test_group_survival_table_from_events_on_waltons_data():
    df = load_waltons()
    first_obs = np.zeros(df.shape[0])
    g, removed, observed, censored = utils.group_survival_table_from_events(df['group'], df['T'], df['E'], first_obs)
    assert len(g) == 2
    assert all(removed.columns == ['removed:miR-137', 'removed:control'])
    assert all(removed.index == observed.index)
    assert all(removed.index == censored.index)
示例#16
0
    def test_rmst_plot_with_two_model(self, block):
        waltons = load_waltons()
        ix = waltons["group"] == "control"
        kmf_con = KaplanMeierFitter().fit(waltons.loc[ix]["T"], waltons.loc[ix]["E"], label="control")
        kmf_exp = KaplanMeierFitter().fit(waltons.loc[~ix]["T"], waltons.loc[~ix]["E"], label="exp")

        rmst_plot(kmf_con, model2=kmf_exp, t=40.0)
        self.plt.title("test_rmst_plot_with_two_model")
        self.plt.show(block=block)
示例#17
0
    def test_logx_plotting(self, block):
        waltons = load_waltons()
        kmf = KaplanMeierFitter().fit(np.exp(waltons["T"]), waltons["E"], timeline=np.logspace(0, 40))
        ax = kmf.plot(logx=True)

        wf = WeibullFitter().fit(np.exp(waltons["T"]), waltons["E"], timeline=np.logspace(0, 40))
        wf.plot_survival_function(logx=True, ax=ax)

        self.plt.title("test_logx_plotting")
        self.plt.show(block=block)
示例#18
0
 def test_can_handle_nans(self):
     df = load_waltons()
     df.insert(0, 'id', df.index)
     duration = df[['id', 'T']].copy()
     duration.insert(1, 'feature', 'duration')
     duration.columns.values[2] = 'value'
     duration.loc[duration.index % 2 == 0, 'value'] = float('nan')
     self.task.main(durations=[duration],
                    categories=[],
                    event_observed=[],
                    estimator='KaplanMeier',
                    id_filter=[],
                    subsets=[])
示例#19
0
    def test_seaborn_doesnt_cause_kmf_plot_error(self, block, kmf, capsys):
        import seaborn as sns

        df = load_waltons()

        T = df['T']
        E = df['E']

        kmf = KaplanMeierFitter()
        kmf.fit(T, event_observed=E)
        kmf.plot()

        self.plt.title('test_seaborn_doesnt_cause_kmf_plot_error')
        self.plt.show(block=block)
        _, err = capsys.readouterr()
        assert err == ""
示例#20
0
    def test_seaborn_doesnt_cause_kmf_plot_error(self, block, kmf, capsys):
        import seaborn as sns

        df = load_waltons()

        T = df["T"]
        E = df["E"]

        kmf = KaplanMeierFitter()
        kmf.fit(T, event_observed=E)
        kmf.plot()

        self.plt.title("test_seaborn_doesnt_cause_kmf_plot_error")
        self.plt.show(block=block)
        _, err = capsys.readouterr()
        assert err == ""
示例#21
0
 def test_can_handle_empty_groups(self):
     df = load_waltons()
     df.insert(0, 'id', df.index)
     subset1 = df[df['group'] == 'control']['id'].tolist()
     subset2 = df[df['group'] == 'miR-137']['id'].tolist()
     duration = df[['id', 'T']].copy()
     categories = df[['id', 'group']].copy()
     duration.insert(1, 'feature', 'duration')
     categories.insert(1, 'feature', 'group')
     duration.columns.values[2] = 'value'
     categories.columns.values[2] = 'value'
     results = self.task.main(durations=[duration],
                              categories=[categories],
                              event_observed=[],
                              estimator='KaplanMeier',
                              id_filter=[],
                              subsets=[subset1, subset2])
     assert not results['stats']['miR-137'].get(0)
     assert not results['stats']['control'].get(1)
示例#22
0
 def test_correct_output_for_simple_input(self):
     df = load_waltons()
     df.insert(0, 'id', df.index)
     duration = df[['id', 'T']].copy()
     duration.insert(1, 'feature', 'duration')
     duration.columns.values[2] = 'value'
     results = self.task.main(durations=[duration],
                              categories=[],
                              event_observed=[],
                              estimator='KaplanMeier',
                              id_filter=[],
                              subsets=[])
     assert results['label'] == 'duration'
     assert len(results['categories']) == 1
     assert len(results['subsets']) == 1
     assert results['stats'][''][0]['timeline']
     assert results['stats'][''][0]['estimate']
     assert results['stats'][''][0]['ci_lower']
     assert results['stats'][''][0]['ci_upper']
示例#23
0
def test_log_rank_test_on_waltons_dataset_with_case_weights():
    df = load_waltons()
    ix = df["group"] == "miR-137"
    waltonT1 = df.loc[ix]["T"]
    waltonT2 = df.loc[~ix]["T"]
    result = stats.logrank_test(waltonT1, waltonT2)
    print(result)

    dfw = df.groupby(["T", "E", "group"
                      ]).size().reset_index().rename(columns={0: "weights"})
    ixw = dfw["group"] == "miR-137"
    waltonT1w = dfw.loc[ixw]["T"]
    waltonT2w = dfw.loc[~ixw]["T"]

    weightsA = dfw.loc[ixw]["weights"]
    weightsB = dfw.loc[~ixw]["weights"]

    resultw = stats.logrank_test(waltonT1w,
                                 waltonT2w,
                                 weights_A=weightsA,
                                 weights_B=weightsB)
    assert_frame_equal(resultw.summary, result.summary)
示例#24
0
def get_sa(request):
    dirname = os.path.dirname(os.path.dirname(__file__)).replace('\\', '/')
    kmffile = '/images/test1.jpg'
    naffile = '/images/test2.jpg'
    context = {}
    context['kmf'] = kmffile
    context['naf'] = naffile
    if not os.path.exists(dirname + kmffile) and not os.path.exists(dirname + naffile):
        df = load_waltons()
        T = df['T']  # an array of durations
        E = df['E']  # a either boolean or binary array representing whether the 'death' was observed (alternatively an individual can be censored)
        kmf = KaplanMeierFitter(alpha=0.95)
        kmf.fit(durations=T, event_observed=E, timeline=None, entry=None, label='KM_estimate', alpha=None, left_censorship=False, ci_labels=None)

        naf = NelsonAalenFitter(alpha=0.95, nelson_aalen_smoothing=True)
        naf.fit(durations=T, event_observed=E, timeline=None, entry=None, label='NA_estimate', alpha=None, ci_labels=None)

        kmf.plot()
        plt.savefig(dirname + kmffile)
        naf.plot()
        plt.savefig(dirname + naffile)

    # return render_to_response(template_name='sa_test.html', context=context, context_instance=RequestContext(request=request))
    return render(request=request, template_name='sa_test.html', context=context)
def test_pairwise_waltons_dataset_is_significantly_different():
    waltons_dataset = load_waltons()
    R = stats.pairwise_logrank_test(waltons_dataset["T"], waltons_dataset["group"])
    assert R.summary.loc[("control", "miR-137")]["p"] < 0.05
示例#26
0
def test_pairwise_waltons_dataset_is_significantly_different():
    waltons_dataset = load_waltons()
    R = stats.pairwise_logrank_test(waltons_dataset['T'],
                                    waltons_dataset['group'])
    assert R.values[0, 1].is_significant
# author: Thomas Haslwanter, date: Jun-2015

# Import standard packages
import matplotlib.pyplot as plt
import C2_8_mystyle as mystyle

# additional packages
from lifelines.datasets import load_waltons
from lifelines import KaplanMeierFitter
from lifelines.statistics import logrank_test

# Set my favorite font
mystyle.set()

# Load and show the data
df = load_waltons() # returns a Pandas DataFrame

print(df.head())
'''
    T  E    group
0   6  1  miR-137
1  13  1  miR-137
2  13  1  miR-137
3  13  1  miR-137
4  19  1  miR-137
'''

T = df['T']
E = df['E']

groups = df['group']
from lifelines import KaplanMeierFitter
from lifelines.datasets import load_waltons
import matplotlib.pyplot as plt

df = load_waltons()

T = df['T']
E = df['E']

kmf = KaplanMeierFitter()
kmf.fit(T, E)

fig = plt.figure()
ax = kmf.plot()
plt.savefig('plots/KM_lifelines_test.png')
示例#29
0
def waltons_dataset():
    return load_waltons()
示例#30
0
#
"""
  Description: lifelines survival analysis example.
"""
#==============================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from lifelines.datasets import load_waltons
from lifelines import KaplanMeierFitter, NelsonAalenFitter

plt.close('all')

df = load_waltons()  # returns a Pandas DataFrame

print(df.head())
"""
    T  E    group
0   6  1  miR-137
1  13  1  miR-137
2  13  1  miR-137
3  13  1  miR-137
4  19  1  miR-137
"""

T = df['T']
E = df['E']

# Fit the survival curve
示例#31
0
def test_survival_table_from_events_binned_with_empty_bin():
    df = load_waltons()
    ix = df["group"] == "miR-137"
    event_table = utils.survival_table_from_events(
        df.loc[ix]["T"], df.loc[ix]["E"], intervals=[0, 10, 20, 30, 40, 50])
    assert not pd.isnull(event_table).any().any()
示例#32
0
def waltons():
    return load_waltons()[["T", "E"]].iloc[:50]
示例#33
0
def waltons_dataset():
    return load_waltons()
示例#34
0
def test_pairwise_waltons_dataset_is_significantly_different():
    waltons_dataset = load_waltons()
    R = stats.pairwise_logrank_test(waltons_dataset['T'], waltons_dataset['group'])
    assert R.values[0, 1].p_value < 0.05
示例#35
0
def test_pairwise_waltons_dataset_is_significantly_different():
    waltons_dataset = load_waltons()
    R = stats.pairwise_logrank_test(waltons_dataset["T"],
                                    waltons_dataset["group"])
    assert R.values[0, 1].p_value < 0.05
示例#36
0
# Tutorial from: http://lifelines.readthedocs.org/en/latest/Quickstart.html

# Import library 
from lifelines.datasets import load_waltons

# Load data frame
df = load_waltons()

# Print dataframe
print (df.head())

# Get separare frame for event and time
T = df['T']
E = df['E']


from lifelines import KaplanMeierFitter
kmf = KaplanMeierFitter()
kmf.fit(T, event_observed=E) # more succiently, kmf.fit(T,E)


kmf.survival_function_
kmf.median_
kmf.plot()




#     Multiple groups
groups = df['group']