示例#1
0
def survival_ll_nelson_aalen(content):
	naf = NelsonAalenFitter()
	naf.fit(content['times'], event_observed=content['events'])
	return httpWrapper( json.dumps({
		'hazard': naf.cumulative_hazard_.to_dict(),
        'confidence': naf.confidence_interval_.to_dict()
		}, ignore_nan=True ))
示例#2
0
def concat_hazard_curve(T, C):
    naf = NelsonAalenFitter(nelson_aalen_smoothing=False)
    naf.fit(T, event_observed=C)
    #return naf.smoothed_hazard_(bandwidth=bandwidth).reindex(range(1,max_idx+1))['differenced-NA_estimate'].values
    return naf.cumulative_hazard_.reindex(
        1, args.max_idx + 1).values, naf.confidence_interval_.reindex(
            1, args, max_idx + 1).values
示例#3
0
def NelsonAelan_dash(T, C):
    naf = NelsonAalenFitter()
    naf.fit(T, event_observed=C)
    naf.plot(title='Nelson-Aalen Estimate')
    naf.plot(ci_force_lines=True, title='Nelson-Aalen Estimate')
    py_p = plt.gcf()
    pyplot(py_p, legend=False)
示例#4
0
    def _vval2ByBootstrap(timeline, nstraps=1000):
        sa1_b, sa2_b = np.zeros((timeline.shape[0], nstraps)), np.zeros(
            (timeline.shape[0], nstraps))
        for sampi in range(nstraps):
            tmp = df.sample(frac=1, replace=True, axis=0)

            ind1 = tmp[treatment_col] == 0
            naf1 = NelsonAalenFitter()
            naf1.fit(durations=tmp.loc[ind1, duration_col],
                     event_observed=tmp.loc[ind1, event_col])
            sa1 = np.exp(-naf1.cumulative_hazard_.iloc[:, 0])
            sa1 = sa1.reindex(timeline, method='ffill')
            sa1_b[:, sampi] = sa1.values

            ind2 = df[treatment_col] == 1
            naf2 = NelsonAalenFitter()
            naf2.fit(durations=tmp.loc[ind2, duration_col],
                     event_observed=tmp.loc[ind2, event_col])
            sa2 = np.exp(-naf2.cumulative_hazard_.iloc[:, 0])
            sa2 = sa2.reindex(timeline, method='ffill')
            sa2_b[:, sampi] = sa2.values
        vval2 = 1 / np.sqrt(
            np.nanvar(np.log(sa1_b), axis=1) +
            np.nanvar(np.log(sa2_b), axis=1))
        return vval2
示例#5
0
def calcSurvHazardCat(df: pd.DataFrame, *, hazardcol: str = "hazard",) -> pd.DataFrame:

    """
    Calculate cumulative hazard survived for each individual patient, as an alternative
    to raw (and often censored) survival time.

    Parameters
    ----------
    df
        A data frame with two compulsory columns: time and event.
    hazardcol
        Column name for the survived hazard.

    Returns
    -------
    The input dataframe, with an extra column of hazards.
    """

    ### Fit survival Nelson-Aalen Estimator of Hazard on survival data
    T = df["time"]
    E = df["event"]
    naf = NelsonAalenFitter()
    naf.fit(T, E)
    df[hazardcol] = naf.predict(T).tolist()
    return df
示例#6
0
 def test_naf_plot_cumulative_hazard_bandwith_1(self, block):
     data1 = np.random.exponential(5, size=(2000, 1)) ** 2
     naf = NelsonAalenFitter()
     naf.fit(data1)
     naf.plot_hazard(bandwidth=5.0, iloc=slice(0, 1700))
     self.plt.title("test_naf_plot_cumulative_hazard_bandwith_1")
     self.plt.show(block=block)
     return
示例#7
0
def createHazardGraph(durations, event_observed):
    naf = NelsonAalenFitter()
    naf.fit(durations, event_observed)
    naf.plot(ci_show=False)

    plt.title("Hard Drive Nelson-Aalen Hazard Estimate")
    plt.ylabel("Cumulative Hazard")
    plt.show()
示例#8
0
 def test_naf_plot_cumulative_hazard(self, block):
     data1 = np.random.exponential(5, size=(200, 1))
     naf = NelsonAalenFitter()
     naf.fit(data1)
     ax = naf.plot()
     naf.plot_cumulative_hazard(ax=ax, ci_force_lines=True)
     self.plt.title("I should have plotted the same thing, but different styles + color!")
     self.plt.show(block=block)
     return
示例#9
0
def survival_ll_nelson_aalen(content):
    kmf = NelsonAalenFitter()
    kmf.fit(content['times'], event_observed=content['events'])
    return httpWrapper(
        json.dumps({
            'result': kmf.survival_function_,
            'hazard': cumulative_hazard_,
            'median': kmf.kmf.median_
        }))
    def fit(
        self, durations, event_observed=None, timeline=None, entry=None, label=None, alpha=None, ci_labels=None, weights=None
    ):  # pylint: disable=too-many-arguments
        """
        Parameters
        ----------
        durations: an array, or pd.Series, of length n
            duration subject was observed for
        timeline:
            return the best estimate at the values in timelines (positively increasing)
        event_observed: an array, or pd.Series, of length n
            True if the the death was observed, False if the event was lost (right-censored). Defaults all True if event_observed==None
        entry: an array, or pd.Series, of length n
           relative time when a subject entered the study. This is
           useful for left-truncated observations, i.e the birth event was not observed.
           If None, defaults to all 0 (all birth events observed.)
        label: string
            a string to name the column of the estimate.
        alpha: float, optional (default=0.05)
            the alpha value in the confidence intervals. Overrides the initializing
           alpha for this call to fit only.
        ci_labels: iterable
            add custom column names to the generated confidence intervals as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<alpha>


        Returns
        -------
          self, with new properties like ``survival_function_``.

        """
        self._label = coalesce(label, self._label, "BFH_estimate")
        alpha = coalesce(alpha, self.alpha)

        naf = NelsonAalenFitter(alpha=alpha)
        naf.fit(durations, event_observed=event_observed, timeline=timeline, label=self._label, entry=entry, ci_labels=ci_labels)
        self.durations, self.event_observed, self.timeline, self.entry, self.event_table, self.weights = (
            naf.durations,
            naf.event_observed,
            naf.timeline,
            naf.entry,
            naf.event_table,
            naf.weights,
        )

        # estimation
        self.survival_function_ = np.exp(-naf.cumulative_hazard_)
        self.confidence_interval_ = np.exp(-naf.confidence_interval_)
        self.confidence_interval_survival_function_ = self.confidence_interval_
        self.confidence_interval_cumulative_density = 1 - self.confidence_interval_

        # estimation methods
        self._estimation_method = "survival_function_"
        self._estimate_name = "survival_function_"

        # plotting functions
        self.plot_survival_function = self.plot
        return self
示例#11
0
 def test_naf_plotting_with_custom_colours(self, block):
     data1 = np.random.exponential(5, size=(200, 1))
     data2 = np.random.exponential(1, size=(500))
     naf = NelsonAalenFitter()
     naf.fit(data1)
     ax = naf.plot(color="r")
     naf.fit(data2)
     naf.plot(ax=ax, color="k")
     self.plt.title("test_naf_plotting_with_custom_coloirs")
     self.plt.show(block=block)
     return
示例#12
0
 def test_naf_plotting_slice(self, block):
     data1 = np.random.exponential(5, size=(200, 1))
     data2 = np.random.exponential(1, size=(200, 1))
     naf = NelsonAalenFitter()
     naf.fit(data1)
     ax = naf.plot(loc=slice(0, None))
     naf.fit(data2)
     naf.plot(ax=ax, ci_force_lines=True, iloc=slice(100, 180))
     self.plt.title("test_naf_plotting_slice")
     self.plt.show(block=block)
     return
示例#13
0
    def _fit_kaplan_meier(self):
        """ private method to fit Kaplan-Meier curve """
        if self.kmf_fit is not None:  # already fitted
            return

        # Overall
        kmf_fit = KaplanMeierFitter()
        kmf_fit.fit(self.time, event_observed=self.event, label=self.label)

        naf_case = NelsonAalenFitter()
        naf_case.fit(self.time, event_observed=self.event, label=self.label)

        self.kmf_fit = kmf_fit
        self.naf_fit = naf_case
示例#14
0
def go():
    print args
    T_all, C_all = concat_TC(all_files)
    T_m, C_m = concat_TC(files_m)
    T_f, C_f = concat_TC(files_f)
    for gender, (T, C) in zip(('all', 'm', 'f'),
                              ((T_all, C_all), (T_m, C_m), (T_f, C_f))):
        naf = NelsonAalenFitter(nelson_aalen_smoothing=False)
        naf.fit(T, event_observed=C)
        dill.dump(
            naf,
            open(
                '/backup/home/jared/storage/foraging/cm/{}_{}_shuffle_{}_{}_{}'
                .format(gender, args.mode, args.min_length, args.ignore_first,
                        args.memory), 'wb'))
示例#15
0
 def _estimateSurv(df, ind):
     naf = NelsonAalenFitter()
     naf.fit(durations=df.loc[ind, duration_col], event_observed=df.loc[ind, event_col])
     
     """Borrowed from lifelines"""
     timeline = sorted(naf.timeline)
     deaths = naf.event_table['observed']
     """Slowest line here."""
     population = naf.event_table['entrance'].cumsum() - naf.event_table['removed'].cumsum().shift(1).fillna(0)
     varsa = np.cumsum(_additive_var(population, deaths))
     varsa = varsa.reindex(timeline, method='pad')
     varsa.index.name = 'timeline'
     varsa.name = 'surv_var'
     
     sa = np.exp(-naf.cumulative_hazard_.iloc[:, 0])
     sa.name = 'surv'
     return naf, sa, varsa
示例#16
0
 def _estimateSurv(df, ind):
     naf = NelsonAalenFitter()
     naf.fit(durations=df.loc[ind, duration_col], event_observed=df.loc[ind, event_col])
     
     """Borrowed from lifelines"""
     timeline = sorted(naf.timeline)
     deaths = naf.event_table['observed']
     """Slowest line here."""
     population = naf.event_table['entrance'].cumsum() - naf.event_table['removed'].cumsum().shift(1).fillna(0)
     varsa = np.cumsum(_additive_var(population, deaths))
     varsa = varsa.reindex(timeline, method='pad')
     varsa.index.name = 'timeline'
     varsa.name = 'surv_var'
     
     sa = np.exp(-naf.cumulative_hazard_.iloc[:, 0])
     sa.name = 'surv'
     return naf, sa, varsa
示例#17
0
def get_hazard_ratio_results(df, group_col, time_col, event_col):
    models = []
    summary_ = None
    summary_result = None
    df = df[[event_col, time_col, group_col]].dropna()
    df[event_col] = df[event_col].astype('category')
    df[event_col] = df[event_col].cat.codes
    df[time_col] = df[time_col].astype('float')
    if not df.empty:
        for name, grouped_df in df.groupby(group_col):
            hr = NelsonAalenFitter()
            t = grouped_df[time_col]
            e = grouped_df[event_col]
            hr.fit(t,
                   event_observed=e,
                   label=name + " (N=" + str(len(t.tolist())) + ")")
            models.append(hr)

    return models
示例#18
0
    def _vval2ByBootstrap(timeline, nstraps=1000):
        sa1_b, sa2_b = np.zeros((timeline.shape[0], nstraps)), np.zeros((timeline.shape[0], nstraps))
        for sampi in range(nstraps):
            tmp = df.sample(frac=1, replace=True, axis=0)

            ind1 = tmp[treatment_col] == 0
            naf1 = NelsonAalenFitter()
            naf1.fit(durations=tmp.loc[ind1, duration_col], event_observed=tmp.loc[ind1, event_col])
            sa1 = np.exp(-naf1.cumulative_hazard_.iloc[:, 0])
            sa1 = sa1.reindex(timeline, method='ffill')
            sa1_b[:, sampi] = sa1.values
            
            ind2 = df[treatment_col] == 1
            naf2 = NelsonAalenFitter()
            naf2.fit(durations=tmp.loc[ind2, duration_col], event_observed=tmp.loc[ind2, event_col])
            sa2 = np.exp(-naf2.cumulative_hazard_.iloc[:, 0])
            sa2 = sa2.reindex(timeline, method='ffill')
            sa2_b[:, sampi] = sa2.values
        vval2 = 1/np.sqrt(np.nanvar(np.log(sa1_b), axis=1) + np.nanvar(np.log(sa2_b), axis=1))
        return vval2
示例#19
0
def plot_HR(df, with_ci=False):
    T = df['days_survived']
    E = df['death']
    naf = NelsonAalenFitter()

    cutoff = np.percentile(df['risk'], 75)
    high_risk = df['risk'] > cutoff

    naf.fit(T[high_risk], event_observed=E[high_risk], label='High_Risk')
    ax = naf.plot(ci_show=with_ci)
    naf.fit(T[~high_risk], event_observed=E[~high_risk], label='Low_Risk')
    naf.plot(ax=ax, ci_show=with_ci)

    plt.ylim(0, .1)
    plt.xlabel("Days")
    plt.ylabel("Risk of Death")
    plt.title("Cardiovascular Death Risk over time (top quartile)")
    if with_ci:
        plt.savefig("./hr_with_ci.png")
    else:
        plt.savefig("./hr_without_ci.png")
示例#20
0
def get_sa(request):
    dirname = os.path.dirname(os.path.dirname(__file__)).replace('\\', '/')
    kmffile = '/images/test1.jpg'
    naffile = '/images/test2.jpg'
    context = {}
    context['kmf'] = kmffile
    context['naf'] = naffile
    if not os.path.exists(dirname + kmffile) and not os.path.exists(dirname + naffile):
        df = load_waltons()
        T = df['T']  # an array of durations
        E = df['E']  # a either boolean or binary array representing whether the 'death' was observed (alternatively an individual can be censored)
        kmf = KaplanMeierFitter(alpha=0.95)
        kmf.fit(durations=T, event_observed=E, timeline=None, entry=None, label='KM_estimate', alpha=None, left_censorship=False, ci_labels=None)

        naf = NelsonAalenFitter(alpha=0.95, nelson_aalen_smoothing=True)
        naf.fit(durations=T, event_observed=E, timeline=None, entry=None, label='NA_estimate', alpha=None, ci_labels=None)

        kmf.plot()
        plt.savefig(dirname + kmffile)
        naf.plot()
        plt.savefig(dirname + naffile)

    # return render_to_response(template_name='sa_test.html', context=context, context_instance=RequestContext(request=request))
    return render(request=request, template_name='sa_test.html', context=context)
示例#21
0
                data_events = np.append(data_events,np.array([time_to_event]*num_repair))
            for v in sales_dict.values():
                #investigate why some negative leftovers on certain valid dates , more repairs than sales ???
                if v>0:
                    data_events = np.append(data_events,np.zeros(v))

            t=[]
            if len(data_events)==0:
                all_data.append([0]*19)
                continue

            data_events[data_events==0] = 160
            C= data_events <160
            naf = NelsonAalenFitter()
            naf.fit(data_events, censorship=C )

            y_h =  np.array(naf.cumulative_hazard_).reshape(len(naf.cumulative_hazard_))
            x= np.array(naf.cumulative_hazard_.index).astype(int)

            seen_data_events.add(0)
            seen_data_events.add(160)

            if len(y_h) > 14:
                slope, intercept, r_value, p_value, std_err = stats.linregress(x[len(x)-5:len(x)-1],y_h[len(y_h)-5:len(y_h)-1])
                #plt.figure()
                #plt.plot(x, y_h, 'ko')
                #plt.plot(x, linear_f(x,slope,intercept ), 'r-')

                #plt.legend()
                #plt.show()
示例#22
0
term_bandwidths = [4., 8.] #list of NAF smoothing bandwidth (for each term)
naf = NelsonAalenFitter(nelson_aalen_smoothing=False) #init NAF model

all_hazards = {} #initialize dict to store hazard functions
for idx,term in enumerate(keep_terms): #compute all hazard functions for each term
    
    cur_data = LD[LD.term==term]
    lifetimes = cur_data['num_pymnts'].copy() #lifetime is number of payments received
    lifetimes.ix[cur_data.loan_status == 'Fully Paid'] = term #if the loan is fully paid set the lifetime to the full term
    is_observed = cur_data.loan_status.isin(['Charged Off']) #observed loans are just the ones that have been charged off, rest are censored   
    
    all_hazards[term] = np.zeros((len(keep_grades),term+1)) #initialize matrix of hazard functions

    for gidx,grade in enumerate(keep_grades): #fit model for each grade
        grade_data = cur_data.grade == grade
        naf.fit(lifetimes[grade_data],event_observed=is_observed[grade_data],label=grade,timeline=np.arange(term+1))
        all_hazards[term][gidx,:] = naf.smoothed_hazard_(term_bandwidths[idx]).squeeze()
        
#%%
terms = LD.term.unique() #set of unique loan terms
for term in terms: #for each possible loan term  
    #get relevant set of loans
    cur_loans = LD.term == term 
    cur_LD = LD[cur_loans]
    
    (NAR, net_returns, p_csum) = LCH.get_NARs(cur_LD, term)
    LD.ix[cur_loans,'ROI'] = NAR #measured performance of each loan
    LD.ix[cur_loans,'net_returns'] = net_returns #principal weighted avg monthly returns
    LD.ix[cur_loans,'prnc_weight'] = p_csum #principal weighted avg monthly returns
    LD.ix[cur_loans,'default_prob'] = LD.ix[cur_loans,'is_observed'].astype(float) #principal weighted avg monthly returns
示例#23
0
def test_exponential_data_sets_fit():
    N = 20000
    T, C = exponential_survival_data(N, 0.2, scale=10)
    naf = NelsonAalenFitter()
    naf.fit(T, C).plot()
    plt.title("Should be a linear with slope = 0.1")
示例#24
0
    plt.title(dept)
    plt.xlim(0, 1000)
    if i == 0:
        plt.ylabel('Frac. in staying after $n$ years')
plt.tight_layout()

for i, dept in enumerate(depts):
    ix = data['dept'] == dept
    kmf.fit(T[ix], E[ix], label=dept)
    print(dept, kmf.median_)

# Looking at a hazard curve
from lifelines import NelsonAalenFitter
naf = NelsonAalenFitter()

naf.fit(T, event_observed=E)
print(naf.cumulative_hazard_.head())
naf.plot()

# This hazard curve shows us that there is low hazard of someone leaving starting off, then it gets worse,
# once you stay for 500 days you stay at least a bit more, then exponentially it gets worse!

# SURVIVAL REGRESSION -- figuring out the influences of other aspects on whether or not someone survives
# Can't use regular linear regression. Want to use Cox's model or Aalen's additive model.

# Cox's Proportional Hazard model
# "The idea behind the model is that the log-hazard of an individual is a linear function of their static covariates
# and a population-level baseline hazard that changes over time" - from https://lifelines.readthedocs.io/en/latest/Survival%20Regression.html

from lifelines.datasets import load_rossi
from lifelines import CoxPHFitter
示例#25
0
     for r in cac_ranges:
         ix = cac_values == r
         if first == 0:
             kmf.fit(times[ix], censors[ix], label=r)
             ax = kmf.plot()
             first = 1
         else:
             kmf.fit(times[ix], censors[ix], label=r) 
             kmf.plot(ax=ax)
 
 elif curve == 'hazard':
     # Plot hazard curve
     naf = NelsonAalenFitter() 
     first = 0
     for r in cac_ranges:
         ix = cac_values == r
         if first == 0:
             naf.fit(times[ix], censors[ix], label=r)
             ax = naf.plot()
             first = 1
         else:
             naf.fit(times[ix], censors[ix], label=r) 
             naf.plot(ax=ax)            
                 
    
 ax.set_ylabel("%", fontsize=12)    
 ax.set_title(tag, fontsize=14)
 ax.set_xlabel("Years to event", fontsize=12)
 
 return times            
 
示例#26
0
if data == 'colon':
    data = pd.read_csv('../data/colon')
    data = data[data.etype == 2]
    data['age_band'] = pd.qcut(data.age, 4)
    print(data.head())
    age_bands = data.age_band.unique().sort_values()
    print('age bands', age_bands)
    ax = plt.subplot()

    for i in range(4):
        mask = data.age_band == age_bands[i]
        print('num individuals in age band', age_bands[i], 'equals', np.sum(mask))
        naf = NelsonAalenFitter()

        fitted = naf.fit(data.loc[mask, 'time'], data.loc[mask, 'status'],
                         label='cum_hazard')
        cum_hazard_df = fitted.cumulative_hazard_

        cum_hazard = cum_hazard_df['cum_hazard'].to_numpy()
        times = cum_hazard_df.index.to_numpy()
        ax = plt.plot(times, cum_hazard, label='Q' + str(i+1), linestyle=linestyles[i])
        print(f'i plus 1 is {i+1}, and age band {age_bands[i]}')
    plt.legend()
    plt.xlabel('Time (in days)')
    plt.ylabel('Cumulative hazard')
    plt.tight_layout()
    plt.savefig('cumulative_hazard_colon.pdf')
    plt.show()

#     #
#     # loan_bands = data['loan_band'].unique()
示例#27
0
文件: main.py 项目: thehyve/Fractalis
    def main(self, durations: List[pd.DataFrame],
             categories: List[pd.DataFrame],
             event_observed: List[pd.DataFrame],
             estimator: str,
             id_filter: List[str],
             subsets: List[List[str]]) -> dict:
        # TODO: Docstring
        if len(durations) != 1:
            error = 'Analysis requires exactly one array that specifies the ' \
                    'duration length.'
            logger.exception(error)
            raise ValueError(error)
        if len(event_observed) > 1:
            error = 'Maximal one variable for "event_observed" allowed'
            logger.exception(error)
            raise ValueError(error)

        df = durations[0]
        df.dropna(inplace=True)
        df = utils.apply_id_filter(df=df, id_filter=id_filter)
        df = utils.apply_subsets(df=df, subsets=subsets)
        df = utils.apply_categories(df=df, categories=categories)

        stats = {}
        categories = df['category'].unique().tolist()
        subsets = df['subset'].unique().tolist()
        # for every category and subset combination estimate the survival fun.
        for category in categories:
            for subset in subsets:
                sub_df = df[(df['category'] == category) &
                            (df['subset'] == subset)]
                T = sub_df['value']
                E = None  # default is nothing is censored
                if len(T) <= 3:
                    continue
                if event_observed:
                    # find observation boolean value for every duration
                    E = event_observed[0].merge(sub_df, how='right', on='id')
                    E = [not x for x in pd.isnull(E['value_x'])]
                    assert len(E) == len(T)
                if estimator == 'NelsonAalen':
                    fitter = NelsonAalenFitter()
                    fitter.fit(durations=T, event_observed=E)
                    estimate = fitter.cumulative_hazard_[
                        'NA_estimate'].tolist()
                    ci_lower = fitter.confidence_interval_[
                        'NA_estimate_lower_0.95'].tolist()
                    ci_upper = fitter.confidence_interval_[
                        'NA_estimate_upper_0.95'].tolist()
                elif estimator == 'KaplanMeier':
                    fitter = KaplanMeierFitter()
                    fitter.fit(durations=T, event_observed=E)
                    # noinspection PyUnresolvedReferences
                    estimate = fitter.survival_function_[
                        'KM_estimate'].tolist()
                    ci_lower = fitter.confidence_interval_[
                        'KM_estimate_lower_0.95'].tolist()
                    ci_upper = fitter.confidence_interval_[
                        'KM_estimate_upper_0.95'].tolist()
                else:
                    error = 'Unknown estimator: {}'.format(estimator)
                    logger.exception(error)
                    raise ValueError(error)
                timeline = fitter.timeline.tolist()
                if not stats.get(category):
                    stats[category] = {}
                stats[category][subset] = {
                    'timeline': timeline,
                    'estimate': estimate,
                    'ci_lower': ci_lower,
                    'ci_upper': ci_upper
                }

        return {
            'label': df['feature'].tolist()[0],
            'categories': categories,
            'subsets': subsets,
            'stats': stats
        }
    def fit(
        self,
        durations,
        event_observed=None,
        timeline=None,
        entry=None,
        label="BFH_estimate",
        alpha=None,
        ci_labels=None,
    ):  # pylint: disable=too-many-arguments
        """
        Parameters
        ----------
        durations: an array, or pd.Series, of length n
            duration subject was observed for
        timeline:
            return the best estimate at the values in timelines (positively increasing)
        event_observed: an array, or pd.Series, of length n
            True if the the death was observed, False if the event was lost (right-censored). Defaults all True if event_observed==None
        entry: an array, or pd.Series, of length n
           relative time when a subject entered the study. This is
           useful for left-truncated observations, i.e the birth event was not observed.
           If None, defaults to all 0 (all birth events observed.)
        label: string
            a string to name the column of the estimate.
        alpha: float, optional (default=0.05)
            the alpha value in the confidence intervals. Overrides the initializing
           alpha for this call to fit only.
        ci_labels: iterable
            add custom column names to the generated confidence intervals as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<alpha>


        Returns
        -------
          self, with new properties like ``survival_function_``.

        """
        self._label = label
        alpha = coalesce(alpha, self.alpha)

        naf = NelsonAalenFitter(alpha=alpha)
        naf.fit(
            durations, event_observed=event_observed, timeline=timeline, label=label, entry=entry, ci_labels=ci_labels
        )
        self.durations, self.event_observed, self.timeline, self.entry, self.event_table = (
            naf.durations,
            naf.event_observed,
            naf.timeline,
            naf.entry,
            naf.event_table,
        )

        # estimation
        self.survival_function_ = np.exp(-naf.cumulative_hazard_)
        self.confidence_interval_ = np.exp(-naf.confidence_interval_)

        # estimation methods
        self._estimation_method = "survival_function_"
        self._estimate_name = "survival_function_"
        self._update_docstrings()

        # plotting functions
        self.plot_survival_function = self.plot
        return self
示例#29
0
plt.ylim(0, 1)
plt.title("Lifespans of different Question types in First 500 Days")

# Test of significances between Question Types
from lifelines.statistics import logrank_test

results = logrank_test(T[short], T[~short], E[short], E[~short], alpha=.99)

results.print_summary()

# Applying output to a hazord curve.
from lifelines import NelsonAalenFitter
naf = NelsonAalenFitter()

naf.fit(T, event_observed=E)
naf.plot()

#By question length
naf.fit(T[short], event_observed=E[short], label="Shorter Questions")
ax = naf.plot(loc=slice(0, 200))
naf.fit(T[~short], event_observed=E[~short], label="Longer Questions")
naf.plot(ax=ax, loc=slice(0, 200))
plt.title("Cumulative hazard function by Question Length (up to 2000= days)")

# Aalen's Additive Model
from lifelines import CoxPHFitter
cph = CoxPHFitter()

#Covariance matrix
import patsy
示例#30
0
                data_events = np.append(data_events,np.array([time_to_event]*num_repair))
            for v in sales_dict.values():
                #investigate why some negative leftovers on certain valid dates , more repairs than sales ???
                if v>0:
                    data_events = np.append(data_events,np.zeros(v))

            t=[]
            if len(data_events)==0:
                all_data.append([0]*19)
                continue

            data_events[data_events==0] = 70
            C= data_events <70
            naf = NelsonAalenFitter()
            naf.fit(data_events, event_observed=C )
            y_h =  np.array(naf.cumulative_hazard_).reshape(len(naf.cumulative_hazard_))
            x= np.array(naf.cumulative_hazard_.index).astype(int)

            seen_data_events.add(0)
            seen_data_events.add(70)

            if len(y_h) > 14:
                slope, intercept, r_value, p_value, std_err = stats.linregress(x[len(x)-5:len(x)-1],y_h[len(y_h)-5:len(y_h)-1])
                
                #plt.figure()
                #plt.plot(x, y_h, 'ko')
                #plt.plot(x, linear_f(x,slope,intercept ), 'r-')

                #plt.legend()
                #plt.show()
示例#31
0
    ~module_survival_data['module_name'].
    isin(['Pre-CLIx_Survey', 'Post-CLIx_Survey'])]
module_survival_data['event'] = 1

groups = module_survival_data['module_name']
T = module_survival_data['duration_weeks']
E = module_survival_data['event']

from lifelines import NelsonAalenFitter
naf = NelsonAalenFitter()
bandwidth = 3.

for i, each in enumerate(list(module_survival_data['module_name'].unique())):
    ix = (groups == each)

    naf.fit(T[ix], event_observed=E[ix], label=each)
    if i == 0:
        ax = naf.plot_hazard(bandwidth=bandwidth, ci_show=False)
    else:
        ax = naf.plot_hazard(ax=ax, bandwidth=bandwidth, ci_show=False)
ax.set_title("Hazard function of different modules | bandwidth=%.1f" %
             bandwidth)

# Survival curves for tools
import pandas
from datetime import datetime, timedelta
from lifelines import KaplanMeierFitter

data_path = '/home/parthae/Documents/Projects/TISS_Git/projects/data_collation/data/data_latest'
cg_data = pandas.read_csv(
    data_path +
plt.show()

ax = plt.subplot(111)
for r in data['Has_Children'].unique():
    ix = data['Has_Children'] == r
    kmf.fit(data['Duration'].loc[ix], data['Divorce'].loc[ix], label=r)
    sns.set()
    ax = kmf.plot(title='Mariage Survival Estimate Based on Children',
                  ax=ax,
                  linewidth=2.5)
#Export the figure
plt.savefig('/home/raed/Dropbox/INSE - 6320/Final Project/Children.pdf')
plt.show()

naf = NelsonAalenFitter()
naf.fit(data['Duration'], data['Divorce'])
sns.set()
naf.plot(title='Cumulative hazard over time', legend=False)
print(naf.cumulative_hazard_.head(32))
plt.savefig(
    '/home/raed/Dropbox/INSE - 6320/Final Project/Cumulative_Hazard_function.pdf'
)
plt.show()

ax = plt.subplot(111)
for r in data['Couple_Race'].unique():
    ix = data['Couple_Race'] == r
    naf.fit(data['Duration'].loc[ix], data['Divorce'].loc[ix], label=r)
    sns.set()
    ax = naf.plot(title='Cumulative Hazard by Couple Race ',
                  ax=ax,
示例#33
0
from lifelines import WeibullFitter

wf = WeibullFitter()
wf.fit(T, E)
print(wf.lambda_, wf.rho_)
wf.print_summary()
wf.plot()

############################################################
# NelsonAalenFitter
############################################################
from lifelines import NelsonAalenFitter

naf = NelsonAalenFitter()

naf.fit(T, event_observed=E)
naf.plot()

# univariate analysis: cum hazard
# ORIG_CHN
ax = plt.subplot()
for chn in df_cox.ORIG_CHN.unique():
    is_chn = (df_cox.ORIG_CHN == chn)
    naf.fit(T[is_chn], event_observed=E[is_chn], label=chn)
    naf.plot(ax=ax)

# PURPOSE
ax = plt.subplot()
for purpose in df_cox.PURPOSE.unique():
    is_pur = (df_cox.PURPOSE == purpose)
    naf.fit(T[is_pur], event_observed=E[is_pur], label=purpose)
示例#34
0
文件: kmna.py 项目: xcodevn/SADP
bins0 = config.BIN0
bins1 = config.BIN1

df = pd.read_stata("wichert.dta")
data_ = zip(df.time/max(df.time), df.event.astype(int))
data  = [(a, b) for (a,b) in data_ if a >= config.GAMMA]

print("[*] Remove #%d outliers" % (len(data_) - len(data)))
N  = len(df) # number of data points

#kmf = KaplanMeierFitter()
(T, E) = zip(*data)
#kmf.fit(T, event_observed=E)
naf = NelsonAalenFitter()
naf.fit(T, event_observed=E)
#ax = pyplot.subplot(121)
#naf.plot(ax=ax)

#ax = pyplot.subplot(122)
#kmf.plot(ax=ax)

true_value =  naf.cumulative_hazard_.values
#naf.cumulative_hazard_.to_csv("naf.csv")

#pyplot.show()

data0  = [ a for (a,b) in data if b == 0 ]
data1  = [ a for (a,b) in data if b == 1 ]

his0,bin_edges0 = np.histogram(data0, bins=bins0, range=(config.GAMMA, 1))
示例#35
0
2  13  1  miR-137
3  13  1  miR-137
4  19  1  miR-137
"""

T = df['T']
E = df['E']

# Fit the survival curve
kmf = KaplanMeierFitter()
kmf.fit(T, event_observed=E)  # or, more succiently, kmf.fit(T, E)
kmf.plot()

# Plot cumulative hazard function
naf = NelsonAalenFitter()
naf.fit(T, E)
naf.plot()

#------------------------------------------------------------------------------
#        Multiple groups
#------------------------------------------------------------------------------
groups = df['group']
ix = (groups == 'miR-137')

kmf.fit(T[~ix], E[~ix], label='control')
ax = kmf.plot()

kmf.fit(T[ix], E[ix], label='miR-137')
kmf.plot(ax=ax)

plt.show()
示例#36
0
class Node:

    score = 0
    split_val = None
    split_var = None
    lhs = None
    rhs = None
    chf = None
    chf_terminal = None
    terminal = False

    def __init__(self,
                 x,
                 y,
                 tree,
                 f_idxs,
                 n_features,
                 unique_deaths=1,
                 min_leaf=1,
                 random_state=None):
        """
        A Node of the Survival Tree.
        :param x: The input samples. Should be a Dataframe with the shape [n_samples, n_features].
        :param y: The target values as a Dataframe with the survival time in the first column and the event.
        :param tree: The corresponding Survival Tree
        :param f_idxs: The indices of the features to use.
        :param n_features: The number of features to use.
        :param unique_deaths: The minimum number of unique deaths required to be at a leaf node.
        :param min_leaf: The minimum number of samples required to be at a leaf node. A split point at any depth will
        only be considered if it leaves at least min_leaf training samples in each of the left and right branches.
        """
        self.x = x
        self.y = y
        self.tree = tree
        self.f_idxs = f_idxs
        self.n_features = n_features
        self.unique_deaths = unique_deaths
        self.random_state = random_state
        self.min_leaf = min_leaf
        self.grow_tree()

    def grow_tree(self):
        """
        Grow tree by calculating the Nodes recursively.
        :return: self
        """
        unique_deaths = self.y.iloc[:,
                                    1].reset_index().drop_duplicates().sum()[1]

        if unique_deaths <= self.unique_deaths:
            self.compute_terminal_node()
            return self

        self.score, self.split_val, self.split_var, lhs_idxs_opt, rhs_idxs_opt = splitting.find_split(
            self)

        if self.split_var is None:
            self.compute_terminal_node()
            return self

        if self.random_state is None:
            lf_idxs = np.random.permutation(self.x.shape[1])[:self.n_features]
            rf_idxs = np.random.permutation(self.x.shape[1])[:self.n_features]
        else:
            lf_idxs = np.random.RandomState(
                seed=self.random_state).permutation(
                    self.x.shape[1])[:self.n_features]
            rf_idxs = np.random.RandomState(
                seed=self.random_state).permutation(
                    self.x.shape[1])[:self.n_features]

        self.lhs = Node(self.x.iloc[lhs_idxs_opt, :],
                        self.y.iloc[lhs_idxs_opt, :],
                        self.tree,
                        lf_idxs,
                        self.n_features,
                        min_leaf=self.min_leaf,
                        random_state=self.random_state)

        self.rhs = Node(self.x.iloc[rhs_idxs_opt, :],
                        self.y.iloc[rhs_idxs_opt, :],
                        self.tree,
                        rf_idxs,
                        self.n_features,
                        min_leaf=self.min_leaf,
                        random_state=self.random_state)

        return self

    def compute_terminal_node(self):
        """
        Compute the terminal node if condition has reached.
        :return: self
        """
        self.terminal = True
        self.chf = NelsonAalenFitter()
        t = self.y.iloc[:, 0]
        e = self.y.iloc[:, 1]
        self.chf.fit(t, event_observed=e, timeline=self.tree.timeline)

        return self

    def predict(self, x):
        """
        Predict the cumulative hazard function if its a terminal node. If not walk through the tree.
        :param x: The input sample.
        :return: Predicted cumulative hazard function if terminal node
        """
        if self.terminal:
            self.tree.chf = self.chf.cumulative_hazard_
            self.tree.chf = self.tree.chf.iloc[:, 0]
            return self.tree.chf

        else:
            if x[self.split_var] <= self.split_val:
                self.lhs.predict(x)
            else:
                self.rhs.predict(x)
示例#37
0
import numpy as np
import pandas as pd

from lifelines import NelsonAalenFitter

path = './totalData.xlsx'
data = pd.read_excel(path)

duration = data.totaltime

indicator = data.failure

naf = NelsonAalenFitter()

naf.fit(duration, indicator)

naf.plot()