Пример #1
0
    def KM_estimate(self):
        kmf = KaplanMeierFitter()
        T = self.T
        kmf.fit(self.x,
                self.δ.astype(np.bool),
                alpha=self.confidence_α,
                timeline=T)
        Survival = np.array(kmf.predict(T))

        self.KM = S_fun(self.x, self.M, T, Survival)
        self.KM.kmf = kmf
        self.KM.Sfun = self.KM.kmf.predict
        self.KM.mean = np.sum([(T[nn + 1] - T[nn]) * Survival[nn]
                               for nn in range(len(Survival) - 1)
                               ]).astype(float) + T[0]

        self.KM.σ = np.array(
            self.KM.kmf.survival_function_.std())[0].astype(float)
        self.KM.mean_σ = self.KM.σ
        self.KM.CI = np.array(self.KM.kmf.confidence_interval_)
        percents = np.array(
            [self.percentile(self.KM.Sfun, T, q / 100.) for q in σ_interval])
        self.KM.median = self.KM.kmf.median_
        self.KM.median_σ = 0.5 * np.diff(percents[1:])[0]
        self.current = 'KM'
    def plot(self):
        """
        Plot side-by-side kaplan-meier of input datasets
        """

        figsize = (10, 5)
        fig, ax = plt.subplots(1, 2, figsize=figsize, sharey=True)

        # sns.set(font_scale=1.5)
        # sns.despine()
        palette = ['#0d3d56', '#006887', '#0098b5', '#00cbde', '#00ffff']

        datasets = [self.stats_original_, self.stats_synthetic_]
        for data, label, ax_cur in zip(datasets, self.labels, ax):
            t = data['time']
            e = data['event']

            kmf = KaplanMeierFitter()
            groups = np.sort(data['group'].unique())
            for g, color in zip(groups, palette):
                mask = (data['group'] == g)
                kmf.fit(t[mask], event_observed=e[mask], label=g)
                ax_cur = kmf.plot_survival_function(ax=ax_cur, color=color)
                ax_cur.legend(title=self.group_column)
                ax_cur.set_title('Kaplan-Meier - {} data'.format(label))
                ax_cur.set_ylim(0, 1)
        plt.tight_layout()
Пример #3
0
def plot_survival_curves(df, time, event, by=None):
    '''
    Creates survival curves grouped by the given categorical variable.

    ex) df.pipe(plot_survival_curves, time='days', event='cancel', by='state')
    '''

    df = df.copy()
    fig, ax = plt.subplots()
    kmf = KaplanMeierFitter()

    if by:
        a = df[by].dropna().astype(str)

        for i in a.unique():
            T = df.loc[df[by] == i, time]
            E = df.loc[df[by] == i, event]
            kmf.fit(T, event_observed=E, label=i)
            kmf.survival_function_.plot(ax=ax)

        plt.legend(title=by, loc=(1, 0))

    else:
        T = df[time]
        E = df[event]
        kmf.fit(T, event_observed=E)
        kmf.survival_function_.plot(ax=ax)
        plt.legend().remove()
Пример #4
0
def subsetsImpactSurvival(subsets,
                          metadata,
                          metacensorcol="overall_survival",
                          metaDFDcol="death_from_disease",
                          plot=False,
                          title=None,
                          rounding=2):
    """
    subsets is a dictionary,
    e.g.: subsets={'cluster {}'.format(i):metadata.index.isin(fitrue.columns[kmeans.labels_==i]) for i in range(4)}
    """
    kmf = KaplanMeierFitter()

    lastvalues = {}
    for subset in subsets:
        kmf.fit(metadata[metacensorcol][subsets[subset]],
                metadata[metaDFDcol][subsets[subset]],
                label=subset)
        lastvalues[subset] = (sum(subsets[subset]),
                              float(kmf.survival_function_.loc[
                                  kmf.survival_function_.last_valid_index()]))
        try:
            kmf.plot(ax=ax)
        except NameError:
            ax = kmf.plot()

    if title: ax.set_title(title)
    ax.set_ylim((0, 1))
    return lastvalues, ax
Пример #5
0
    def marginal(self):
        #reverse KaplanMeier
        self.data['status'] = self.data['status'].values.astype(int) ^ 1

        #  weights at requested times
        if "IPCW.times" in self.what:
            kmf = KaplanMeierFitter()
            kmf.fit(self.data['failure_time'],
                    event_observed=self.data['status'].values,
                    timeline=self.times)
            self.weights = np.round(kmf.predict(self.times), decimals=4)
            #self.weights = kmf.conditional_time_to_event_(self.times)
            # self.times = predict(fit, newdata=data, times=times, level_chaos=1, mode="matrix", type="surv")
            self.times = []
        else:
            self.times = None

        # weights at subject specific event times
        if "IPCW.subject.times" in self.what:
            # self.subject_times = prodlim.predictSurvIndividual(fit, lag=self.lag)
            self.subject_times = []
        else:
            self.subject_times = None

        out = {
            'times': self.times,
            'subject_times': self.subject_times,
            'method': self.method
        }
        out = self.output(out, self.keep, self.times, self.fit, self.call)

        # class(out) < - "IPCW"
        return self.weights
Пример #6
0
	def __KM_analysis(self,duration_table,expressed_array,unexpressed_array,freq_set):
		data = {}
		expressed_T = []
		expressed_C = []
		unexpressed_T = []
		unexpressed_C = []
		for idx,row in enumerate(duration_table):
			if(idx>0):
				if row[0] in unexpressed_array and row[1] !=  "NA" and row[2] !=  "NA":
					unexpressed_T.append(float(row[1]))
					unexpressed_C.append(int(row[2]))
				elif row[0] in expressed_array and row[1] != "NA" and row[2] !=  "NA":
					expressed_T.append(float(row[1]))
					expressed_C.append(int(row[2]))

		results = logrank_test(expressed_T, unexpressed_T, expressed_C, unexpressed_C, alpha=.95 )
		if(results.p_value < .0006):
			ax = plt.subplot(111)
			kmf = KaplanMeierFitter()
			kmf.fit(expressed_T, event_observed=expressed_C, label="Satisfying")
			kmf.plot(ax=ax, ci_force_lines=False)
			kmf.fit(unexpressed_T, event_observed=unexpressed_C, label="None-Satisfying")
			kmf.plot(ax=ax, ci_force_lines=False)
			plt.ylim(0,1)
			plt.title("Lifespans ("+str(freq_set)+")")
			plt.show()	
		return results.p_value
Пример #7
0
def plot_two_groups(data, t_col_name, e_col_name, g_name, alpha):
    '''
    functino to render the 2 groups and calculate the p values
    '''
    T = data[t_col_name]
    E = data[e_col_name]

    groups = df[g_name]

    # get unique groups to get 1st and 2nd groups names
    uniques = df[g_name].unique()

    ix = (groups == uniques[0])

    kmf = KaplanMeierFitter()
    # plot first group
    kmf.fit(T[~ix], E[~ix], label=uniques[1])
    ax = kmf.plot()

    # plot second group
    kmf.fit(T[ix], E[ix], label=uniques[0])
    kmf.plot(ax=ax)
    # get resoults for p Values
    results = logrank_test(T[ix], T[~ix], E[ix], E[~ix], alpha=alpha)
    plt.title('p-value: {0:.4f}, alpha: {1:.2f}'.format(
        results.p_value, alpha))
Пример #8
0
Файл: km.py Проект: xcodevn/SADP
def fun(epsilon):
    li = []
    for kk in range(100):
        newdata_= laplace_mechanism(his , np.sqrt(2.0) / epsilon)

        newdata = [max([0.0, d]) for d in newdata_]

        ntime  = np.asarray([])
        nevent = np.asarray([])
        for i in range(bins0):
            ntime = np.append(ntime, np.linspace(bin_edges0[i], bin_edges0[i+1] , newdata[i]))
            #ntime = np.append(ntime, np.ones(newdata[i]) * 0.5 * (bin_edges0[i+1] + bin_edges0[i] )) # , newdata[i]))
            nevent = np.append(nevent,np.zeros(newdata[i]))

        for i in range(bins1):
            ntime = np.append(ntime,np.linspace(bin_edges1[i], bin_edges1[i+1], newdata[bins0 + i]))
            #ntime = np.append(ntime, np.ones(newdata[bins0 + i]) * 0.5 * (bin_edges1[i+1] +  bin_edges1[i] )) # , newdata[i]))
            nevent = np.append(nevent, np.ones(newdata[bins0+i]))

        kmf1 = KaplanMeierFitter()
        kmf1.fit(ntime, event_observed=nevent)
        #naf1.fit(ntime, event_observed=nevent)
        out = kmf1.predict(kmf.timeline)
        #pyplot.plot (naf1.timeline, naf1.cumulative_hazard_.values)
        #pyplot.plot (naf.timeline, naf.cumulative_hazard_.values)
        #pyplot.show()

        mre = ( np.linalg.norm(out - true_value[:,0]) / np.linalg.norm(true_value[:,0]) )
        li.append(mre)
    avg = np.average( li )
    #mean_relative_error.append(avg)
    print "(%f, %f)" % (epsilon, avg)
Пример #9
0
def survival_analysis(dataframe, grouping, years = 5):
	# remove patients with null values
	df2 = dataframe.dropna(subset = [grouping])
	df2 = df2.dropna(subset = ['_OS'])
	df2 = df2.dropna(subset = ['_EVENT'])

	# limit analysis to number of years specified
	df2['survival'] = np.nan
	df2['event'] = np.nan
	maxtime = years * 365
	df2['survival'][(df2['_OS'] > maxtime)] = maxtime
	df2['event'][(df2['_OS'] > maxtime)] = 0
	df2['survival'][(df2['_OS'] <= maxtime)] = df2['_OS']
	df2['event'][(df2['_OS'] <= maxtime)] = df2['_EVENT']

	# get groups
	grouped_data = df2.groupby(grouping)
	unique_groups = list(grouped_data.groups.keys())
	unique_groups.sort()

	#plot survival curve
	kmf = KaplanMeierFitter()
	ax = plt.subplot(111)
	for i, group in enumerate(unique_groups):
		data = grouped_data.get_group(group)
		kmf.fit(data['survival'], data['event'], label = group)
		# print(data['_OS'])
		kmf.plot(ax=ax, show_censors = True)

	plt.show()
Пример #10
0
def plotKM(genes):
    extractSurvivalData()
    data = np.genfromtxt("data/survival_complete.txt", delimiter='\t', dtype=str)

    # df = load_waltons()  # returns a Pandas DataFrame
    # print(df)

    df = pd.DataFrame(data, columns=['id', 'ER', 'PR', 'HER2', 'TN', 'GCH1', 'CDH1', 'CDH2', 'VIM', 'bCatenin', 'ZEB1',
                                     'ZEB2', 'TWIST1', 'SNAI1',
                                     'SNAI2', 'RET', 'NGFR', 'EGFR', 'AXL', 'STATUS', 'MONTHS'])

    kmf = KaplanMeierFitter()

    for i in range(0, 14):
        # divide the complete data set into type positive and type negative (e.g. ER+ and ER-)
        # data below contain the value of the gene

        ERP, ERN = separateLabels(df, 'ER', i, 1)
        # PRP, PRN = separateLabels(df, 'PR', i, 1)
        # HER2P, HER2N = separateLabels(df, 'HER2', i,1)
        # TNP, TNN = separateLabels(df, 'TN', i,1)

        # within each type (pos/neg), divide data into high/low gene expressions

        ERPH, ERPL = separateHighandLow(df, genes, i, ERP.values)

        # KM plot
        kmf.fit(ERPH[:, 2:3].astype(float), label='pos_high')
        ax = kmf.plot()
        kmf.fit(ERPL[:, 2:3].astype(float), label='pos_low')
        kmf.plot(ax=ax)

        plt.savefig("images/kmplot_" + genes[i])
        plt.clf()
Пример #11
0
def plot_km_survf(data, t_col="t", e_col="e", save_file=''):
    """
    Plot KM survival function curves.
    Parameters
    ----------
    data: pandas.DataFrame
        Survival data to plot.
    t_col: str
        Column name in data indicating time.
    e_col: str
        Column name in data indicating events or status.
    save_model: string
            Path for saving model.
    """
    from lifelines import KaplanMeierFitter
    from lifelines.plotting import add_at_risk_counts

    f = plt.figure()
    fig, ax = plt.subplots(figsize=(6, 4))
    kmfh = KaplanMeierFitter()
    kmfh.fit(data[t_col],
             event_observed=data[e_col],
             label="KM Survival Curve")
    kmfh.survival_function_.plot(ax=ax)
    plt.ylim(0, 1.01)
    plt.xlabel("Time")
    plt.ylabel("Probalities")
    plt.legend(loc="best")
    add_at_risk_counts(kmfh, ax=ax)
    #plt.show()
    f.savefig(save_file + '.pdf', bbox_inches='tight')
def plot_survival_function(scdf):

    dfl = scdf.copy()
    dfl['sc3'] = dfl['frameZeroUtr3LenAdj'] + 3

    for i in dfl.index:
        if dfl.loc[i, 'sc3'] < 101:
            dfl.loc[i, 'kill'] = 1
        else:
            dfl.loc[i, 'kill'] = 0

    kfm = KaplanMeierFitter()
    T = dfl['sc3']
    E = dfl['kill']

    kfm.fit(T, event_observed=E)
    kfm.survival_function_.plot()

    ax = plt.gca()

    ax.set_ylim(0, 1)
    ax.set_xlim(0, 100)

    figout = "%s/figures/Fig3S2B.pdf" % rootDir
    plt.savefig(figout, format='pdf', bbox_inches="tight")
class KaplanMeier:
    def __init__(self):
        self.kmf = KaplanMeierFitter()

    def fit(self, X, y):
        self.kmf.fit(durations=X, event_observed=y, left_censorship=True)
        print("cumulative_density_:")
        print(self.kmf.cumulative_density_)
        return self

    def predict_proba(self, X):
        return self.kmf.cumulative_density_.loc[np.squeeze(X), 'KM_estimate']

    def predict(self, X):
        return np.where(self.predict_proba(X)>=0.5, 1.0, 0.0)

    def evaluate(self, X, y_bin_true, sample_weights=None):
        y_proba_pred = self.predict_proba(X)
        y_bin_pred = np.where(y_proba_pred>=0.5, 1.0, 0.0)

        # return log_loss(y_bin_true, y_proba_pred, sample_weight=sample_weights), \
        #        0.0, \
        #        accuracy_score(y_bin_true, y_bin_pred, sample_weight=sample_weights)

        return log_loss(y_bin_true, y_proba_pred, sample_weight=sample_weights), \
               c_index(y_bin_true, y_proba_pred, np.squeeze(X)), \
               accuracy_score(y_bin_true, y_bin_pred, sample_weight=sample_weights)
Пример #14
0
def plot_survival_curves(rec_t, rec_e, antirec_t, antirec_e, experiment_name = '', output_file = None):
    # Set-up plots
    plt.figure(figsize=(12,3))
    ax = plt.subplot(111)

    # Fit survival curves
    kmf = KaplanMeierFitter()
    kmf.fit(rec_t, event_observed=rec_e, label=' '.join([experiment_name, "Recommendation"]))   
    kmf.plot(ax=ax,linestyle="-")
    kmf.fit(antirec_t, event_observed=antirec_e, label=' '.join([experiment_name, "Anti-Recommendation"]))
    kmf.plot(ax=ax,linestyle="--")
    
    # Format graph
    plt.ylim(0,1);
    ax.set_xlabel('Timeline (months)',fontsize='large')
    ax.set_ylabel('Percentage of Population Alive',fontsize='large')
    
    # Calculate p-value
    results = logrank_test(rec_t, antirec_t, rec_e, antirec_e, alpha=.95)
    results.print_summary()

    # Location the label at the 1st out of 9 tick marks
    xloc = max(np.max(rec_t),np.max(antirec_t)) / 9
    if results.p_value < 1e-5:
        ax.text(xloc,.2,'$p < 1\mathrm{e}{-5}$',fontsize=20)
    else:
        ax.text(xloc,.2,'$p=%f$' % results.p_value,fontsize=20)
    plt.legend(loc='best',prop={'size':15})


    if output_file:
        plt.tight_layout()
        pylab.savefig(output_file)
Пример #15
0
def dust_mass_KM():
    vt19_data = Table.read('/home/jotter/nrao/tables/VT19.txt', format='latex')
    eis_data = Table.read('/home/jotter/nrao/tables/eisner_tbl.txt',
                          format='ascii')
    dmass_data = Table.read(
        '/home/jotter/nrao/summer_research_2018/tables/r0.5_apr20_calc_vals.fits'
    )

    vt19_dmass_raw = vt19_data['Mass']
    eis_dmass_raw = eis_data['M_dust^a']
    B3_dmass = np.log10(dmass_data['dust_mass_B3'])
    B7_dmass = np.log10(dmass_data['dust_mass_B7'])
    B7_dmass = B7_dmass[np.where(np.isnan(B7_dmass) == False)[0]]

    vt19_dmass = []
    for dm in vt19_dmass_raw:
        vt19_dmass.append(dm.split()[0][1:])

    vt19_dmass = np.log10(np.array(vt19_dmass[1:], dtype='float'))

    #eis_dmass = []
    #for dm in eis_dmass_raw:
    #    eis_dmass.append(dm.split()[0])
    #eis_dmass = np.log10(np.array(eis_dmass, dtype='float'))
    #eis_dmass = eis_dmass[np.where(np.isinf(eis_dmass) == False)[0]]

    kmf = KaplanMeierFitter()
    kmf.fit(vt19_dmass)

    fig = plt.figure()
    ax = kmf.plot()
    plt.savefig('/home/jotter/nrao/plots/VT19_KM_plot.png')
Пример #16
0
def kaplan_meier(out, t, ttype):
    def make_label(ttype, nobs):
        return "Rand%d; %d obs." % (ttype, nobs)

    kmf = KaplanMeierFitter()
    kmf.fit(t, event_observed=out, label=make_label(ttype=ttype, nobs=len(out)))
    return kmf
Пример #17
0
def main():
    args = parse_args()
    if args.data_dir is None:
        data_dir = DATA_DIR
    else:
        data_dir = Path(args.data_dir)
    with open(str(data_dir.joinpath(args.file_name)), 'rb') as f:
        inputdata_list = pickle.load(f)
    y_orig = inputdata_list[0]
    preds_bootfull = inputdata_list[1]
    inds_inbag = inputdata_list[2]
    del inputdata_list

    preds_bootfull_mat = np.concatenate(preds_bootfull, axis=1)
    inds_inbag_mat = np.array(inds_inbag).T
    inbag_mask = 1*np.array([np.any(inds_inbag_mat==_, axis=0) for _ in range(inds_inbag_mat.shape[0])])
    preds_bootave_oob = np.divide(np.sum(np.multiply((1-inbag_mask), preds_bootfull_mat), axis=1), np.sum(1-inbag_mask, axis=1))
    risk_groups = 1*(preds_bootave_oob > np.median(preds_bootave_oob))

    wdf = pd.DataFrame(
        np.concatenate((y_orig, preds_bootave_oob[:, np.newaxis],risk_groups[:, np.newaxis]), axis=-1),
        columns=['status', 'time', 'preds', 'risk_groups'], index=[str(_) for _ in risk_groups]
    )

    kmf = KaplanMeierFitter()
    ax = plt.subplot(111)
    kmf.fit(durations=wdf.loc['0','time'], event_observed=wdf.loc['0','status'], label="Low Risk")
    ax = kmf.plot(ax=ax)
    kmf.fit(durations=wdf.loc['1','time'], event_observed=wdf.loc['1','status'], label="High Risk")
    ax = kmf.plot(ax=ax)
    plt.ylim(0,1)
    plt.title("Kaplan-Meier Plots")
    plt.xlabel('Time (days)')
    plt.ylabel('Survival Probability')
Пример #18
0
def kmplot(df_high, df_low, ax):
    kmf_high = KaplanMeierFitter()
    kmf_low = KaplanMeierFitter()
    try:
        kmf_high.fit(durations=df_high.duration,
                     event_observed=df_high.event,
                     label='High: n = ' + str(len(df_high)))
        kmf_low.fit(durations=df_low.duration,
                    event_observed=df_low.event,
                    label="Low: n = " + str(len(df_low)))
    except ValueError:
        return ("NA", "0", "0", "0", "0")
    kmf_high.plot(ax=ax, color="red", show_censors=True, ci_show=False)
    kmf_low.plot(ax=ax, color="black", show_censors=True, ci_show=False)
    statistics_result = logrank_test(df_high.duration,
                                     df_low.duration,
                                     event_observed_A=df_high.event,
                                     event_observed_B=df_low.event)
    p_value = statistics_result.p_value
    ax.set_xlabel('Time (months)')
    ax.set_ylabel('Probability')
    ax.text(0.95,
            0.02,
            'logrank P = ' + str('%.4f' % p_value),
            verticalalignment='bottom',
            horizontalalignment='right',
            transform=ax.transAxes,
            color='black',
            fontsize=11)
    plt.legend(loc=3)
    hm5 = kmf_high.predict(60)
    hm10 = kmf_high.predict(120)
    lm5 = kmf_low.predict(60)
    lm10 = kmf_low.predict(120)
    return (p_value, hm5, hm10, lm5, lm10)
Пример #19
0
    def plot_kaplan_meier(self, column, value):
        """[plot Kaplan meier survival plots of cleaned METABRIC clinical data]

        Args:
            column ([string]): [column in METABRIC data corresponding to a patient attribute, such as her2 receptor
            status]
            value ([string or integer]): [value of column that is a point of comparision. ie column:her2_recepter value:'negative']
        Plots values in column vs != values in column
        """
        kmf = KaplanMeierFitter()
        treatment_df = self.data[self.data[column] == value]
        not_treatment_df = self.data[self.data[column] != value]
        treatment_months = treatment_df.overall_survival_months
        not_treatment_months = not_treatment_df.overall_survival_months

        kmf.fit(treatment_months,
                event_observed=treatment_df['death_from_cancer'],
                label=value)
        ax = kmf.plot()

        kmf2 = KaplanMeierFitter()
        kmf2.fit(not_treatment_months,
                 event_observed=not_treatment_df['death_from_cancer'],
                 label=f'not {value}')
        ax = kmf2.plot(ax=ax)
        add_at_risk_counts(kmf, kmf2, ax=ax)
        ax.set_ylim([0.0, 1.0])
        ax.set_xlabel('Timeline (Months)')
        ax.set_title(f'Kaplan Meier plot in months of {column} variable')
        # plt.figure(dpi=350)
        plt.tight_layout()
        plt.show()
def survival_for_two(df, treat, ctrl, legends, title, figname):

    # select the time and status info for treat and control group
    ix = df['group'] == treat
    t1 = df.loc[ix]['time']
    print(t1.shape)
    e1 = df.loc[ix]['status']
    t2 = df.loc[~ix]['time']
    print(t2.shape)
    e2 = df.loc[~ix]['status']

    results = logrank_test(t1, t2, event_observed_A=e1, event_observed_B=e2)
    pvalue = results.p_value
    print('pvalue:\t{}'.format(pvalue))

    # survival curves
    plt.figure(figsize=(3., 3.))
    ax = plt.subplot(111)

    kmf_control = KaplanMeierFitter()
    #g1 = kmf_control.fit(t1, e1, label=legends[0]).plot(ax=ax,show_censors=True,\
    g1 = kmf_control.fit(t1, e1).plot(ax=ax,show_censors=True,\
                        censor_styles={'ms': 12, 'marker': '+'},ci_show=False,c='red',ls='-')

    kmf_exp = KaplanMeierFitter()
    #g2 = kmf_exp.fit(t2, e2, label=legends[1]).plot(ax=ax,show_censors=True,\
    g2 = kmf_exp.fit(t2, e2).plot(ax=ax,show_censors=True,\
                    censor_styles={'ms': 12, 'marker': '+'},ci_show=False,c='k',ls='--')

    handles, labels = ax.get_legend_handles_labels()
    print(labels)
    lg = ax.legend(handles[1::2],
                   legends,
                   loc='lower left',
                   borderaxespad=-.15,
                   handletextpad=.2,
                   labelspacing=.3,
                   handlelength=1,
                   frameon=False)
    if pvalue < 1:
        plt.axes().text(df['time'].max() * 0.45,
                        0.45,
                        'p={:.2f}'.format(pvalue),
                        fontsize=16,
                        ha='center')


#          plt.axes().text(df['time'].max()*0.45,0.45,'p={:.2e}'.format(pvalue),fontsize=16,ha='center')
    plt.ylim([-0.02, 1.05])
    #     plt.xlim([0,max_val*1])
    plt.title(title, fontsize=22)
    plt.xlabel('Days', fontsize=22)
    plt.ylabel('Survival probability', fontsize=22)
    plt.savefig(figname,
                bbox_inches='tight',
                pad_inches=.1,
                dpi=600,
                transparent=True)
    plt.close()
    return results
def KM_estimator(relapsed_data, censored_data):
    durations = relapsed_data + censored_data
    event_observed = list(np.ones(len(relapsed_data))) + list(np.zeros(len(censored_data)))
    ax = plt.subplot(111)
    kmf = KaplanMeierFitter()

    kmf.fit(durations, event_observed, label='kaplan-meier curve')

    axes = plt.gca()
    axes.set_ylim([0, 1])
    axes.set_xlim([0, 86])
    axes.set_position([0.16, 0.175, 0.81, 0.8])

    kmf.plot(show_censors=False, censor_styles={'ms': 3, 'marker': 's'}, ci_show=True, at_risk_counts=False)
    plt.xlabel('Time in Months', labelpad=10, fontsize=20) #, weight='bold'
    plt.ylabel('Survival Probability', labelpad=10, fontsize=20)

    for tick in ax.xaxis.get_major_ticks():
        tick.label1.set_fontsize(15)
        #tick.label1.set_fontweight('bold')
    for tick in ax.yaxis.get_major_ticks():
        tick.label1.set_fontsize(15)
        #tick.label1.set_fontweight('bold')

    plt.savefig('km.pdf')
    plt.show()
Пример #22
0
def meetingTimeHelper(df):
    """
    
    Input:
        df: data frame, columns "tau" and "hasMet"
            tau: int, meeting time
            hasMet: boolean, whether we observe meeting event
            nXstep: int, number of steps at the end (either due to meeting or censoring)
    Outputs:
        KM fits of the meeting time (in sampling sweeps and in processor time)
    
    """

    censoredTimes = []
    for (idx, val) in enumerate(df["tau"]):
        if val == -1:
            censoredTime = df["nXstep"].iloc[idx]
        else:
            censoredTime = val
        censoredTimes.append(censoredTime)

    tauFitter = KaplanMeierFitter()
    tauFitted = tauFitter.fit(censoredTimes, df["hasMet"])

    timeFitter = KaplanMeierFitter()
    timeFitted = timeFitter.fit(df["timeTaken"], df["hasMet"])
    return tauFitted, timeFitted
Пример #23
0
    def survival_plot_and_cox(self, df_arr, label=[], filename=''):
        plt.clf()
        color = ['red', 'green', 'blue', 'cyan', 'orange', 'black']

        kmf = KaplanMeierFitter()
        naf = NelsonAalenFitter()

        for a in range(len(df_arr)):
            df_el = df_arr[a]
            if a == 0:
                kmf.fit(df_el['bcrmonth'], df_el['bcrstatus'], label=label[a])
                ax = kmf.plot(show_censors=True,
                              ci_show=False,
                              color=color[a],
                              ylim=(0, 1))
            else:
                kmf.fit(df_el['bcrmonth'], df_el['bcrstatus'], label=label[a])
                kmf.plot(ax=ax,
                         show_censors=True,
                         ci_show=False,
                         color=color[a],
                         ylim=(0, 1))

        fig = ax.get_figure()
        fig.savefig(filename + '.png')
        fig.savefig(filename + '.pdf', format='PDF')
Пример #24
0
def KaplanMeier_dash(T, C):
    kmf = KaplanMeierFitter()
    kmf.fit(T, event_observed=C)
    kmf.plot(title='Kaplan Meier fitter')
    kmf.plot(ci_force_lines=True, title='Kaplan Meier fitter')
    kmf1 = plt.gcf()
    pyplot(kmf1, legend=False)
Пример #25
0
def km_analysis(survivalDf, durationCol, statusCol, saveFile=None):
    kmf = KaplanMeierFitter()
    kmf.fit(survivalDf.loc[:, durationCol], survivalDf.loc[:, statusCol])
    survFunc = kmf.survival_function_

    m, b, r, p, e = stats.linregress(list(survFunc.index), survFunc.iloc[:, 0])

    survivalDf = survivalDf.sort_values(by=durationCol)
    ttpfs = numpy.array(survivalDf.loc[:, durationCol])
    survTime = numpy.array(survFunc.index)
    survProb = []

    for i in range(len(ttpfs)):
        date = ttpfs[i]
        if date in survTime:
            survProb.append(survFunc.loc[date, "KM_estimate"])
        elif date not in survTime:
            lbix = numpy.where(numpy.array(survFunc.index) < date)[0][-1]
            est = 0.5 * (survFunc.iloc[lbix, 0] + survFunc.iloc[lbix + 1, 0])
            survProb.append(est)

    kmEstimate = pandas.DataFrame(survProb)
    kmEstimate.columns = ["kmEstimate"]
    kmEstimate.index = survivalDf.index

    pfsDf = pandas.concat([survivalDf, kmEstimate], axis=1)

    if saveFile is not None:
        pfsDf.to_csv(saveFile)

    return pfsDf
Пример #26
0
def km_curve(labels_ids, survival_dataset, tested_gene_expression_headers_columns, gene_group , k=None, label_index=None):
    ax = plt.subplot(111)

    kmf = KaplanMeierFitter()
    all_labels = np.array([y for x in labels_ids for y in x])
    label_event_list = []
    label_duration_list = []
    results = []
    for i, cur_labels in enumerate(labels_ids):
        label_event = survival_dataset[np.in1d(survival_dataset[:, 0], cur_labels) & np.in1d(survival_dataset[:, 0], tested_gene_expression_headers_columns), 4].astype(np.int32)
        label_duration = survival_dataset[np.in1d(survival_dataset[:, 0], cur_labels) & np.in1d(survival_dataset[:, 0], tested_gene_expression_headers_columns), 3].astype(np.int32)
        label_event_list.append(label_event)
        label_duration_list.append(label_duration)
        labels_c = all_labels[~np.in1d(all_labels,cur_labels) & np.in1d(all_labels, tested_gene_expression_headers_columns)]
        label_event_c = survival_dataset[np.in1d(survival_dataset[:, 0], labels_c), 4].astype(np.int32)
        label_duration_c = survival_dataset[np.in1d(survival_dataset[:, 0], labels_c), 3].astype(np.int32)

        lr_results = logrank_test(label_duration, label_duration_c, label_event, label_event_c, alpha=.95)
        if len(label_duration) != 0:
            kmf.fit(list(label_duration), event_observed=list(label_event), label="cluster {} n={}, logrank pval = {}".format(i,len(label_duration), '{0:1.3e}'.format(lr_results.p_value))) # '%.7f' %
            kmf.plot(ax=ax, show_censors=True)
            print "lrank cluster {} vs all: {}".format(i, lr_results.p_value)
            results.append(lr_results.p_value)
            for j, cur_duration in enumerate(label_duration_list[:-1]):
                lr_results = logrank_test(label_duration, label_duration_list[j], label_event, label_event_list[j], alpha=.95)
                print "lrank cluster {} vs cluster {}: {}".format(i, j, lr_results.p_value)
    plt.ylim(0, 1);

    plt.title("clustering survival analysis");
    plt.savefig(os.path.join(constants.BASE_PROFILE,"output" ,"cluster_by_p_{}_{}_k={}_label_i={}_{}.png".format(constants.CANCER_TYPE, gene_group.split("/")[-1],k,label_index , time.time())))
    plt.cla()

    return results
Пример #27
0
def get_km_results(df, group_col, time_col, event_col):
    models = []
    summary_ = None
    summary_result = None
    df = df[[event_col, time_col, group_col]].dropna()
    df[event_col] = df[event_col].astype('category')
    df[event_col] = df[event_col].cat.codes
    df[time_col] = df[time_col].astype('float')
    if not df.empty:
        for name, grouped_df in df.groupby(group_col):
            kmf = KaplanMeierFitter()
            t = grouped_df[time_col]
            e = grouped_df[event_col]
            kmf.fit(t,
                    event_observed=e,
                    label=name + " (N=" + str(len(t.tolist())) + ")")
            models.append(kmf)

        summary_ = multivariate_logrank_test(df[time_col].tolist(),
                                             df[group_col].tolist(),
                                             df[event_col].tolist(),
                                             alpha=99)

    if summary_ is not None:
        summary_result = "Multivariate logrank test: pval={}, t_statistic={}".format(
            summary_.p_value, summary_._test_statistic)

    return models, summary_result
Пример #28
0
    def plot_km_estimates(self, index):
        # Kaplan-Meier estimations for sub group and complement

        rcParams['figure.figsize'] = 15, 6
        plt.figure(index + 1)
        ax = plt.subplot(111)

        kmf_sg = KaplanMeierFitter()
        kmf_cpl = KaplanMeierFitter()
        kmf_sg.fit(self.sub_group['survival_times'],
                   self.sub_group['events'],
                   label='KM estimates for subgroup',
                   alpha=UserInputs.kmf_alpha)
        kmf_sg.plot(ax=ax)
        kmf_cpl.fit(self.sub_group_complement['survival_times'],
                    self.sub_group_complement['events'],
                    label='KM estimates for complement',
                    alpha=UserInputs.kmf_alpha)
        kmf_cpl.plot(ax=ax)

        title = self.string_repr[0] + ': ' + self.string_repr[1]
        plt.title(title)
        plt.xlabel('Time')
        plt.ylabel('Survival probability')

        fig_id = self.string_repr[0] + '_model'
        plt.savefig(fig_id)

        return
Пример #29
0
def single_submit(form):
    if form.validate_on_submit():

        database = form.DataBase.data
        Gene = form.GeneName.data
        low = int(form.Low.data)
        high = int(form.High.data)

        static = {}
        data, os, static['mean'], static['std'] = ReadData(database, Gene)

        num = len(os)
        low = max(int(num * low / 100), 1)
        high = max(int(num * high / 100), 1)

        Low, High = data[:, 1][0:low], data[:, 1][-high:]
        group1, group2 = data[:, 2][0:low], data[:, 2][-high:]

        kmf = KaplanMeierFitter()
        kmf.fit(Low, group1, label=Gene + '/low')
        ax = kmf.plot()
        kmf.fit(High, group2, label=Gene + '/high')
        kmf.plot(ax=ax)
        plt.savefig("static/test.png", bbox_inches='tight')
        plt.close()

        return render_template("single.html",
                               form=form,
                               image="test.png",
                               refresh=np.random.randn(),
                               static=static)
    else:
        return render_template("single.html",
                               form=form,
                               err=form.errors)
Пример #30
0
def plot_km_survf(data, t_col="t", e_col="e",datatype='train_data'):
    """
    Plot KM survival function curves.

    Parameters
    ----------
    data: pandas.DataFrame
        Survival data to plot.
    t_col: str
        Column name in data indicating time.
    e_col: str
        Column name in data indicating events or status.
    """
    from lifelines import KaplanMeierFitter
    from lifelines.plotting import add_at_risk_counts
    fig, ax = plt.subplots(figsize=(6, 4))
    kmfh = KaplanMeierFitter()
    kmfh.fit(data[t_col], event_observed=data[e_col], label="KM Survival Curve")
    kmfh.survival_function_.plot(ax=ax)
    plt.ylim(0, 1.01)
    plt.xlabel("Time")
    plt.ylabel("Probalities")
    plt.legend(loc="best")
    add_at_risk_counts(kmfh, ax=ax)
    # plt.show()
    if datatype=='train_data':
        plt.savefig('/home/kyro_zhang/ZQX/train_data.png')
    else if datatype=='test_data':
        plt.savefig('/home/kyro_zhang/ZQX/test_data.png')
    else:
        plt.savefig('/home/kyro_zhang/ZQX/predict_data.png')
Пример #31
0
def kaplan_meier_curve(
    data_df: Union[pd.DataFrame, str],
    task: str = "liver",
    threshold: Union[float, List] = 0.5,
    process_dir: str = None,
):
    if isinstance(data_df, str):
        data_df = pd.read_csv(data_df)

    if isinstance(threshold, float):
        thresholds = [threshold, 1]
    else:
        thresholds = threshold
        thresholds.append(1)

    ax = plt.subplot(111)
    kmf = KaplanMeierFitter()
    prev_threshold = -1
    for threshold in thresholds:
        name = f"{task}: {prev_threshold} < y <= {threshold}"
        grouped_df = data_df[(data_df[task] > prev_threshold)
                             & (data_df[task] <= threshold)]

        kmf.fit(grouped_df["duration"], grouped_df["event"], label=name)
        kmf.plot(ax=ax)
        prev_threshold = threshold

    plt.xlabel("Follow-up time (days)")
    plt.ylabel("Probability of survival")

    if process_dir is not None:
        plt.tight_layout()
        plt.savefig(os.path.join(process_dir, f"{task}_kaplan_meier.pdf"))
def graph(months, survival_status, has_mutation, name):

    survival_data = pd.DataFrame({
        'OS_MONTHS': months,
        'OS_STATUS': survival_status  # 0 if living, 1 if dead
    })
    #0 if don't have mutation, 1 if do have mutation in has_mutation

    ## create an kmf object
    kmf = KaplanMeierFitter()

    ## fit the data into a model for each group
    kmf.fit(survival_data.OS_MONTHS[has_mutation],
            survival_data.OS_STATUS[has_mutation],
            label="have mutation")
    layer1 = kmf.plot(ci_show=True)

    kmf.fit(survival_data.OS_MONTHS[~has_mutation],
            survival_data.OS_STATUS[~has_mutation],
            label="no mutation")
    layer2 = kmf.plot(ax=layer1, ci_show=True)

    plt.title('{} survival plot'.format(name))

    ## view plot
    plt.show()
Пример #33
0
def KM_median(array,
              upper_lim_flags,
              left_censor=True,
              return_type='percentile'):
    kmf = KaplanMeierFitter()

    if upper_lim_flags is not None:
        if left_censor == True:
            kmf.fit_left_censoring(array, upper_lim_flags)
        else:
            kmf.fit(array, event_observed=upper_lim_flags)  #right censoring
    else:
        kmf.fit(array, upper_lim_flags)

    median = median_survival_times(kmf.survival_function_)

    if return_type == 'percentile':
        upper_perc = kmf.percentile(0.25)
        lower_perc = kmf.percentile(0.75)

        print(
            f'median and 1st/3rd quartiles: {median}, {lower_perc}, {upper_perc}'
        )
        return median, upper_perc, lower_perc

    elif return_type == 'ci':
        median_ci = median_survival_times(kmf.confidence_interval_).values
        print(f'median and CI: {median}, {median_ci}')
        return median, median_ci[0][0], median_ci[0][1]

    elif return_type == 'median':
        return median
Пример #34
0
def plot_Kaplan_Meier_feature(donor_dataset):
    '''Accepts a dataframe of donor data.  For each feature (column), it plots the Kaplan-Meier curves of the donors based on whether the feature is true or false.  The active donors ('censored') will be excluded from the plot.

    Parameters:
    donor_dataset: Pandas dataframe which contain at least the columns 'Total-years' and 'censored'.  'Total_years' represents how many years the donors have been active.  'censored' indicates whether a donor is still active (True = active donor).

    Output:
    Kaplan-Meier plot(s).

    This function does not return anything.
    '''
    T = donor_dataset['Total_years']
    C = donor_dataset['censored']
    features = list(donor_dataset.columns)
    features.remove('Total_years')
    features.remove('censored')
    features.remove('Baseline')
    kmf = KaplanMeierFitter()
    for feature in features:
        Above_mean = donor_dataset[feature] > donor_dataset[donor_dataset['censored'] == 0][feature].mean()
        fig = plt.figure(figsize=(5, 5))
        ax = fig.add_subplot(111)
        kmf = KaplanMeierFitter()
        kmf.fit(T[Above_mean], C[Above_mean], label = feature + ': Yes or > mean')
        kmf.plot(ax=ax, linewidth = 2)
        kmf.fit(T[~Above_mean], C[~Above_mean], label = feature + ': No or < mean')
        kmf.plot(ax=ax, linewidth = 2)
        ax.set_xlabel('Years', size = 10)
        ax.set_ylabel('Surviving donor population', size = 10)
        ax.set_xlim(0,40)
        ax.set_ylim(0, 1)
        ax.grid()
        ax.legend(loc = 'upper right', fontsize = 10)
        plt.show()
Пример #35
0
def plot_riskGroups(data_groups, event_col, duration_col, labels=[], plot_join=False, 
                    xlabel="Survival time (Month)", ylabel="Survival Rate", legend="Risk Groups",
                    title="Survival function of Risk groups", save_fig_as=""):
    """Plot survival curve for different risk groups.

    Parameters
    ----------
    data_groups : list(`pandas.DataFame`) 
        list of DataFame[['E', 'T']], risk groups from lowest to highest.
    event_col : str
        column in DataFame indicating events.
    duration_col : atr
        column in DataFame indicating durations.
    labels : list(str), default []
        One text label for one group.
    plot_join : bool, default False
        Is plotting for two adjacent risk group, default False.
    save_fig_as : str
        Name of file for saving in local.

    Returns
    -------
    None
        Plot figure of each risk-groups.

    Examples
    --------
    >>> plot_riskGroups(df_list, "E", "T", labels=["Low", "Mid", "High"])
    """
    # init labels
    N_groups = len(data_groups)
    if len(labels) == 0:
        for i in range(N_groups):
            labels.append(str(i+1))
    # Plot
    fig, ax = plt.subplots(figsize=(8, 6))
    kmfit_groups = []
    for i in range(N_groups):
        kmfh = KaplanMeierFitter()
        sub_group = data_groups[i]
        kmfh.fit(sub_group[duration_col], event_observed=sub_group[event_col], label=labels[i])
        kmfh.survival_function_.plot(ax=ax)
        kmfit_groups.append(kmfh)
    # Plot two group (i, i + 1)
    if plot_join:
        for i in range(N_groups - 1):
            kmfh = KaplanMeierFitter()
            sub_group = pd.concat([data_groups[i], data_groups[i+1]], axis=0)
            kmfh.fit(sub_group[duration_col], event_observed=sub_group[event_col], label=labels[i]+'&'+labels[i+1])
            kmfh.survival_function_.plot(ax=ax)
            kmfit_groups.append(kmfh)
    plt.ylim(0, 1.01)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    plt.legend(loc="best", title=legend)
    add_at_risk_counts(*kmfit_groups, ax=ax)
    plt.show()
    if save_fig_as != "":
        fig.savefig(save_fig_as, format='png', dpi=600, bbox_inches='tight')
def kmplot(df_high, df_low):
	kmf_high = KaplanMeierFitter()
	kmf_low = KaplanMeierFitter()
	try:
		kmf_high.fit(durations = df_high.duration, event_observed = df_high.event, label = 'High: n = ' + str(len(df_high)))
		kmf_low.fit(durations = df_low.duration, event_observed = df_low.event, label = "Low: n = " + str(len(df_low)))
	except ValueError:
		return("NA", "0", "0", "0", "0")

	statistics_result = logrank_test(df_high.duration, df_low.duration, event_observed_A = df_high.event, event_observed_B = df_low.event)
	p_value = statistics_result.p_value
                                       
	hm5 = kmf_high.predict(60)
	hm10 = kmf_high.predict(120)
	lm5 = kmf_low.predict(60)
	lm10 = kmf_low.predict(120)
	return(p_value, hm5, hm10, lm5, lm10)
Пример #37
0
def surAnalysis(storeId):
    duration = []
    observed = []
    
    for elem in survival.find({'store_id':storeId}):
        duration.append(elem['duration']/86400)
        observed.append(elem['observed'])
    if duration==[]:
        pass
    else:
        dura_obj = array(duration)
        obs_obj = array(observed)
        
        kmf = KaplanMeierFitter()
        kmf.fit(dura_obj,obs_obj)
        ax = kmf.plot()
        #ax.set_xlim(0,1)
        #ax.set_ylim(0.85,1.0)
        ax.get_figure().savefig('F:\workshop\lbs_lyf\static\images\\' + storeId)
        plt.close(ax.get_figure())
Пример #38
0
def survival(time, status, pGroups=None):
  kmf = KaplanMeierFitter()
  if pGroups is None:
    order = [i for i in range(2, len(time)) 
		if time[i] != "" and status[i] != ""]
    t = [float(time[i]) for i in order]
    s = [int(status[i]) for i in order]
    kmf.fit(t, s)
    ax = kmf.plot(color='red')
    return ax
  else:
    ax = None
    groups = [ "" for i in time]
    for k in range(len(pGroups)):
      df = pd.DataFrame()
      order = [i for i in pGroups[k][2]
               if time[i] != "" and status[i] != ""]
      if len(order) <= 0:
          continue
      for i in order:
        groups[i] = k
      t = [float(time[i]) for i in order]
      s = [int(status[i]) for i in order]
      kmf.fit(t, s, label = pGroups[k][0])
      if ax is None:
        ax = kmf.plot(color=pGroups[k][1], ci_show=False, show_censors=True)
      else:
        ax = kmf.plot(ax = ax, color=pGroups[k][1], ci_show=False, show_censors=True)
    order = [i for i in range(len(groups)) if groups[i] != ""]
    if len(order) > 0:
      t = [float(time[i]) for i in order]
      s = [int(status[i]) for i in order]
      g = [int(groups[i]) for i in order]
      from lifelines.statistics import multivariate_logrank_test
      from matplotlib.legend import Legend
      res = multivariate_logrank_test(t, g, s)
      leg = Legend(ax, [], [], title = "p = %.2g" % res.p_value,
                   loc='lower left', frameon=False)
      ax.add_artist(leg);
    return ax
def kmplot(df_high, df_low, ax):
	kmf_high = KaplanMeierFitter()
	kmf_low = KaplanMeierFitter()
	try:
		kmf_high.fit(durations = df_high.duration, event_observed = df_high.event, label = 'High: n = ' + str(len(df_high)))
		kmf_low.fit(durations = df_low.duration, event_observed = df_low.event, label = "Low: n = " + str(len(df_low)))
	except ValueError:
		return("NA", "0", "0", "0", "0")
	kmf_high.plot(ax = ax, color = "red", show_censors=True,  ci_show=False)
	kmf_low.plot(ax = ax, color = "black", show_censors=True, ci_show=False)
	statistics_result = logrank_test(df_high.duration, df_low.duration, event_observed_A = df_high.event, event_observed_B = df_low.event)
	p_value = statistics_result.p_value
	ax.set_xlabel('Time (months)')
	ax.set_ylabel('Probability')
	ax.text(0.95, 0.02, 'logrank P = ' + str('%.4f' % p_value), verticalalignment='bottom', horizontalalignment='right', transform=ax.transAxes,
        color = 'black', fontsize = 11)
	plt.legend(loc=3)
	hm5 = kmf_high.predict(60)
	hm10 = kmf_high.predict(120)
	lm5 = kmf_low.predict(60)
	lm10 = kmf_low.predict(120)
	return(p_value, hm5, hm10, lm5, lm10)
def plot_Kaplan_Meier_overall(donor_dataset):
	'''Accepts a dataframe of donor data.  Plots the overall Kaplan-Meier curve based of the lifetime of the donors.  The active donors ('censored') will be excluded from the plot.

	Parameters:
	donor_dataset: Pandas dataframe which contain at least the columns 'Total-years' and 'censored'.  'Total_years' represents how many years the donors have been active.  'censored' indicates whether a donor is still active (True = active donor).

	Output:
	A Kaplan-Meier plot.

	This function does not return anything.

	'''
	#This produces two data frames of the columns 'Total_years'
	#and 'censored.'  The former indicates how manay years a
	#donor has donoted before she/he churned.  The latter indicates
	#whether the donor is censored (not churned).  Only donor who
	#has churned (not censored) are used because we don't know the
	#'Total_years' of donors who have not churned yet.
	T = donor_dataset['Total_years']
	C = donor_dataset['censored']

	#Create KaplanMeierInstance
	kmf = KaplanMeierFitter()
	kmf.fit(T, C, label = 'Overall')

	#plot KM function
	fig = plt.figure(figsize=(5, 5))
	ax = fig.add_subplot(111)
	kmf.plot(ax=ax)
	ax.set_xlabel('Years', size = 20)
	ax.set_ylabel('Surviving donor population', size = 20)
	ax.set_xlim(0,40)
	ax.set_ylim(0, 1)
	ax.grid()
	ax.legend(loc = 'best', fontsize = 20)
	plt.show()
	return
Пример #41
0
def get_sa(request):
    dirname = os.path.dirname(os.path.dirname(__file__)).replace('\\', '/')
    kmffile = '/images/test1.jpg'
    naffile = '/images/test2.jpg'
    context = {}
    context['kmf'] = kmffile
    context['naf'] = naffile
    if not os.path.exists(dirname + kmffile) and not os.path.exists(dirname + naffile):
        df = load_waltons()
        T = df['T']  # an array of durations
        E = df['E']  # a either boolean or binary array representing whether the 'death' was observed (alternatively an individual can be censored)
        kmf = KaplanMeierFitter(alpha=0.95)
        kmf.fit(durations=T, event_observed=E, timeline=None, entry=None, label='KM_estimate', alpha=None, left_censorship=False, ci_labels=None)

        naf = NelsonAalenFitter(alpha=0.95, nelson_aalen_smoothing=True)
        naf.fit(durations=T, event_observed=E, timeline=None, entry=None, label='NA_estimate', alpha=None, ci_labels=None)

        kmf.plot()
        plt.savefig(dirname + kmffile)
        naf.plot()
        plt.savefig(dirname + naffile)

    # return render_to_response(template_name='sa_test.html', context=context, context_instance=RequestContext(request=request))
    return render(request=request, template_name='sa_test.html', context=context)
Пример #42
0
    def __init__(self, db, male=False, female=False, other=False, both=True):
        self.db = db
        self.male = male
        self.female = female
        self.other = other
        self.both = both

        duration = []
        observed = []
        group = []

        for elem in self.db.find():
            duration.append(elem['duration'] / 86400)
            observed.append(elem['observed'])
            group.append(elem['gender'])
        dura_obj = array(duration)
        obs_obj = array(observed)
        group_obj = array(group)
        DataFrame(dura_obj, index=group_obj)
        DataFrame(obs_obj, index=group_obj)
        male = group_obj == 1
        female = group_obj == 2
        other = group_obj == 0

        kmf = KaplanMeierFitter()
        kmf.fit(dura_obj, obs_obj, label='both')
        ax = kmf.plot()
        if self.male is True:
            kmf.fit(dura_obj[male], obs_obj[male], label='male')
            kmf.plot(ax=ax)
        if self.female is True:
            kmf.fit(dura_obj[female], obs_obj[female], label='female')
            kmf.plot(ax=ax)
        if self.other is True:
            kmf.fit(dura_obj[other], obs_obj[other], label='other')
            kmf.plot(ax=ax)
        # ax.set_xlim(19,22)
        # ax.set_ylim(1,2)
        ax.get_figure().savefig('maleAndFemale')
Пример #43
0
def generate_plot():  # Perhaps `regenerate_plot`?
    """ Dynamically fit and plot a Kaplan-Meier curve. """
    df_ = df.copy()

    # Use constraints
    for index in range(len(categories)):
        if index not in category_select.active:
            df_ = df_[df_.category != category_select.labels[index]]

    df_ = df_[min_size_select.value <= df_['size']]
    df_ = df_[df_['size'] <= max_size_select.value]

    df_ = df_[min_age_select.value <= df_.age]
    df_ = df_[df_.age <= max_age_select.value]

    if 0 not in sex_select.active:  # Male
        df_ = df_[df_.sex != 1]
    if 1 not in sex_select.active:  # Female
        df_ = df_[df_.sex != 2]

    if len(df_) == 0:  # Bad constraints
        status.text = 'No cases found. Try different constraints.'
        return

    doa = [not survived for survived in df_.survived]

    kmf = KaplanMeierFitter()
    fit = kmf.fit(df_.days, event_observed=doa, label='prob_of_surv')

    # Here, we are using the smoothed version of the Kaplan-Meier curve
    # The stepwise version would work just as well

    data, surv_func = renderer.data_source.data, fit.survival_function_
    data.update(x=surv_func.index, y=surv_func.prob_of_surv)

    start, end = 0, max(df_.days)
    # bounds='auto' doesn't work?
    plot.x_range.update(start=start, end=end, bounds=(start, end))
    status.text = '{} cases found.'.format(len(df_))
Пример #44
0
def plot_kmf(df, 
             condition_col, 
             censor_col, 
             survival_col, 
             threshold=None,
             title=None,
             xlabel=None,
             ax=None,
             print_as_title=False):
    """
    Plot survival curves by splitting the dataset into two groups based on
    condition_col

    if threshold is defined, the groups are split based on being > or <
    condition_col

    if threshold == 'median', the threshold is set to the median of condition_col

    Parameters
    ----------
        df: dataframe
        condition_col: string, column which contains the condition to split on
        survival_col: string, column which contains the survival time
        censor_col: string,
        threshold: int or string, if int, condition_col is thresholded,
                                  if 'median', condition_col thresholded 
                                  at its median
        title: Title for the plot, default None
        ax: an existing matplotlib ax, optional, default None
        print_as_title: bool, optional, whether or not to print text
          within the plot's title vs. stdout, default False
    """
    kmf = KaplanMeierFitter()
    if threshold is not None:
        if threshold == 'median':
            threshold = df[condition_col].median()
        condition = df[condition_col] > threshold
        label = '{} > {}'.format(condition_col, threshold)
    else:
        condition = df[condition_col]
        label = '{}'.format(condition_col)

    df_with_condition = df[condition]
    df_no_condition = df[~condition]
    survival_no_condition = df_no_condition[survival_col]
    survival_with_condition = df_with_condition[survival_col]

    event_no_condition = (df_no_condition[censor_col].astype(bool))
    event_with_condition = (df_with_condition[censor_col].astype(bool))
             
    kmf.fit(survival_no_condition, event_no_condition, label="")
    if ax:
        kmf.plot(ax=ax, show_censors=True, ci_show=False)
    else:
        ax = kmf.plot(show_censors=True, ci_show=False)

    kmf.fit(survival_with_condition, event_with_condition, label=(label))
    kmf.plot(ax=ax, show_censors=True, ci_show=False)

    # Set the y-axis to range 0 to 1
    ax.set_ylim(0, 1)

    no_cond_str = "# no condition {}".format(len(survival_no_condition))
    cond_str = "# with condition {}".format(len(survival_with_condition))
    if title:
        ax.set_title(title)
    elif print_as_title:
        ax.set_title("%s | %s" % (no_cond_str, cond_str))
    else:
        print(no_cond_str)
        print(cond_str)

    if xlabel:
        ax.set_xlabel(xlabel)
 
    results = logrank_test(survival_no_condition, 
                           survival_with_condition, 
                           event_observed_A=event_no_condition, 
                           event_observed_B=event_with_condition)
    return results
	males = df[df['gender']=='Male']
	females = df[df['gender']=='Female']

	T = df["lifetime"] #measured in days
	C = df["dead"]

	females_ = df["gender"] == "Female"
	males_ = df["gender"] == "Male"

	community_stats = {
		'community': community,
		'size': females.count()[0] + males.count()[0],

		'women_frequency_median' : females['activity_freq'].median(),
		'men_frequency_median' : males['activity_freq'].median(),
		'frequency_difference_median': females['activity_freq'].median() - males['activity_freq'].median(),
		'women_frequency_mean' : females['activity_freq'].mean(),
		'men_frequency_mean' : males['activity_freq'].mean(),
		'frequency_difference_mean': females['activity_freq'].mean() - males['activity_freq'].mean(),
		'frequency_pvalue': 2* stats.mannwhitneyu(females['activity_freq'], males['activity_freq'])[1],

		'women_lifetime_median':kmf.fit(T[females_], event_observed=C[females_], label="Female").median_,
		'men_lifetime_median':kmf.fit(T[males_], event_observed=C[males_], label="Male").median_,
		'lifetime_pvalue': logrank_test(T[females_], T[males_], C[females_], C[males_], alpha=.95 ).p_value

	}

	community_stats['lifetime_difference_median'] = community_stats["women_lifetime_median"] - community_stats["men_lifetime_median"]

	results_db.insert( community_stats )
Пример #46
0
cutoff = 30 # Generate a censor length
cutoff = np.repeat(cutoff, N) 
duration = np.minimum(event_t,cutoff) # "Cut-off" observations over cutoff level
not_censor = event_t <= duration  # generate a boolean indicator of censoring
not_censor = not_censor.astype(int) # convert boolean to zeroes and ones

# Convert to data frame
data = pd.DataFrame({'duration': duration, 'event': not_censor, 'age': age, 'college': college})

# Plot observations with censoring
# plot_lifetimes(duration, event_observed = not_censor)

# Kaplan Meier Summary for Simulated Data
from lifelines import KaplanMeierFitter
kmf =  KaplanMeierFitter()
kmf.fit(duration, event_observed = not_censor)
kmf.survival_function_.plot()

# Cox-PH Model Regression
from lifelines import CoxPHFitter
cf = CoxPHFitter()
cf.fit(data, 'duration', event_col = 'event')
cf.print_summary()

## Get Predictions from Model ##

# 24 year old college grad
#college_24 = pd.DataFrame({'age':[24], 'college':[1]})
#cf.predict_survival_function(college_24).plot()

# 65 year old high school grad
def get_kmf_fit(qs):
    t = qs.values_list('days_since_complaint', flat=True)
    c = qs.values_list('is_closed', flat=True)
    kmf = KaplanMeierFitter()
    kmf.fit(t, event_observed=c)
    return kmf
Пример #48
0
def _plot_kmf_single(df,
                     condition_col,
                     survival_col,
                     censor_col,
                     threshold,
                     title,
                     xlabel,
                     ylabel,
                     ax,
                     with_condition_color,
                     no_condition_color,
                     with_condition_label,
                     no_condition_label,
                     color_map,
                     label_map,
                     color_palette,
                     ci_show,
                     print_as_title):
    """
    Helper function to produce a single KM survival plot, among observations in df by groups defined by condition_col.

    All inputs are required - this function is intended to be called by `plot_kmf`.
    """
    # make color inputs consistent hex format
    if colors.is_color_like(with_condition_color):
        with_condition_color = colors.to_hex(with_condition_color)
    if colors.is_color_like(no_condition_color):
        no_condition_color = colors.to_hex(no_condition_color)
    ## prepare data to be plotted; producing 3 outputs:
    # - `condition`, series containing category labels to be plotted
    # - `label_map` (mapping condition values to plot labels)
    # - `color_map` (mapping condition values to plotted colors)
    if threshold is not None:
        is_median = threshold == "median"
        if is_median:
            threshold = df[condition_col].median()
        label_suffix = float_str(threshold)
        condition = df[condition_col] > threshold
        default_label_no_condition = "%s ≤ %s" % (condition_col, label_suffix)
        if is_median:
            label_suffix += " (median)"
        default_label_with_condition = "%s > %s" % (condition_col, label_suffix)
        with_condition_label = with_condition_label or default_label_with_condition
        no_condition_label = no_condition_label or default_label_no_condition
        if not label_map:
            label_map = {False: no_condition_label,
                         True: with_condition_label}
        if not color_map:
            color_map = {False: no_condition_color,
                         True: with_condition_color}
    elif df[condition_col].dtype == 'O' or df[condition_col].dtype.name == "category":
        condition = df[condition_col].astype("category")
        if not label_map:
            label_map = dict()
            [label_map.update({condition_value: '{} = {}'.format(condition_col,
                                                        condition_value)})
                     for condition_value in condition.unique()]
        if not color_map:
            rgb_values = sb.color_palette(color_palette, len(label_map.keys()))
            hex_values = [colors.to_hex(col) for col in rgb_values]
            color_map = dict(zip(label_map.keys(), hex_values))
    elif df[condition_col].dtype == 'bool':
        condition = df[condition_col]
        default_label_with_condition = "= {}".format(condition_col)
        default_label_no_condition = "¬ {}".format(condition_col)
        with_condition_label = with_condition_label or default_label_with_condition
        no_condition_label = no_condition_label or default_label_no_condition
        if not label_map:
            label_map = {False: no_condition_label,
                         True: with_condition_label}
        if not color_map:
            color_map = {False: no_condition_color,
                         True: with_condition_color}
    else:
        raise ValueError('Don\'t know how to plot data of type\
                         {}'.format(df[condition_col].dtype))

    # produce kmf plot for each category (group) identified above
    kmf = KaplanMeierFitter()
    grp_desc = list()
    grp_survival_data = dict()
    grp_event_data = dict()
    grp_names = list(condition.unique())
    for grp_name, grp_df in df.groupby(condition):
        grp_survival = grp_df[survival_col]
        grp_event = (grp_df[censor_col].astype(bool))
        grp_label = label_map[grp_name]
        grp_color = color_map[grp_name]
        kmf.fit(grp_survival, grp_event, label=grp_label)
        desc_str = "# {}: {}".format(grp_label, len(grp_survival))
        grp_desc.append(desc_str)
        grp_survival_data[grp_name] = grp_survival
        grp_event_data[grp_name] = grp_event
        if ax:
            ax = kmf.plot(ax=ax, show_censors=True, ci_show=ci_show, color=grp_color)
        else:
            ax = kmf.plot(show_censors=True, ci_show=ci_show, color=grp_color)

    ## format the plot
    # Set the y-axis to range 0 to 1
    ax.set_ylim(0, 1)
    y_tick_vals = ax.get_yticks()
    ax.set_yticklabels(["%d" % int(y_tick_val * 100) for y_tick_val in y_tick_vals])
    # plot title
    if title:
        ax.set_title(title)
    elif print_as_title:
        ax.set_title(' | '.join(grp_desc))
    else:
        [print(desc) for desc in grp_desc]
    # axis labels
    if xlabel:
        ax.set_xlabel(xlabel)
    if ylabel:
        ax.set_ylabel(ylabel)
    
    ## summarize analytical version of results
    ## again using same groups as are plotted
    if len(grp_names) == 2:
        # use log-rank test for 2 groups
        results = logrank_test(grp_survival_data[grp_names[0]],
                               grp_survival_data[grp_names[1]],
                               event_observed_A=grp_event_data[grp_names[0]],
                               event_observed_B=grp_event_data[grp_names[1]])
    elif len(grp_names) == 1:
        # no analytical result for 1 or 0 groups
        results = NullSurvivalResults()
    else:
        # cox PH fitter for >2 groups
        cf = CoxPHFitter()
        cox_df = patsy.dmatrix('+'.join([condition_col, survival_col,
                                         censor_col]),
                               df, return_type='dataframe')
        del cox_df['Intercept']
        results = cf.fit(cox_df, survival_col, event_col=censor_col)
        results.print_summary()
    # add metadata to results object so caller can print them
    results.survival_data_series = grp_survival_data
    results.event_data_series = grp_event_data
    results.desc = grp_desc
    return results
Пример #49
0
from lifelines.datasets import load_waltons

# Load data frame
df = load_waltons()

# Print dataframe
print (df.head())

# Get separare frame for event and time
T = df['T']
E = df['E']


from lifelines import KaplanMeierFitter
kmf = KaplanMeierFitter()
kmf.fit(T, event_observed=E) # more succiently, kmf.fit(T,E)


kmf.survival_function_
kmf.median_
kmf.plot()




#     Multiple groups
groups = df['group']
ix = (groups == 'miR-137')

kmf.fit(T[~ix], E[~ix], label='control')
ax = kmf.plot()
Пример #50
0
duration = []
observed = []
group = []

for elem in after_users.find():
    #if elem['duration'] >=1500000:
    duration.append(elem['duration']/86400)
    observed.append(elem['observed'])
    group.append(elem['gender'])
dura_obj = array(duration)
obs_obj = array(observed)
group_obj = array(group)
DataFrame(dura_obj,index=group_obj)
DataFrame(obs_obj,index=group_obj)
male = group_obj ==1
female = group_obj ==2
other = group_obj ==0

kmf = KaplanMeierFitter()
kmf.fit(dura_obj[male],obs_obj[male], label = 'male')
ax = kmf.plot()
kmf.fit(dura_obj[female],obs_obj[female], label = 'female')
kmf.plot(ax=ax)
kmf.fit(dura_obj,obs_obj, label = 'both')
kmf.plot(ax=ax)
#kmf.fit(dura_obj[other],obs_obj[other], label = 'other')
#kmf.plot(ax=ax)
#ax.set_xlim(19,22)
#ax.set_ylim(1,2)
ax.get_figure().savefig('maleAndFemale_both_17day')
print(df.head())
'''
    T  E    group
0   6  1  miR-137
1  13  1  miR-137
2  13  1  miR-137
3  13  1  miR-137
4  19  1  miR-137
'''

T = df['T']
E = df['E']

groups = df['group']
ix = (groups == 'miR-137')

kmf = KaplanMeierFitter()

kmf.fit(T[~ix], E[~ix], label='control')
ax = kmf.plot()

kmf.fit(T[ix], E[ix], label='miR-137')
kmf.plot(ax=ax)

plt.ylabel('Survival Probability')
plt.show()

# Compare the two curves
results = logrank_test(T[ix], T[~ix], event_observed_A=E[ix], event_observed_B=E[~ix])
results.print_summary()
Пример #52
0
def plot_survival(unique_groups, grouped_data, analysis_type, censors, ci, showplot, stat_results, time='Months'):
	#plot survival curve
	kmf = KaplanMeierFitter()
	fig, ax = plt.subplots()
	n_in_groups = []

	f = open('Kaplan_%s.txt' % (analysis_type), 'a')
	f.write("\nPercent %s\n" % analysis_type)
	headers = "Group\t"
	for x in range(95,-1,-5):
		headers += str(x) + "%\t"
	f.write("%s\n" % headers)


	for i, group in enumerate(unique_groups):
		data = grouped_data.get_group(group)
		n_in_groups.append(len(data))
		# Adjust survival data from days to whatever form wanted
		if time.lower() == 'months':
			survival_time = (data['survival']/(365/12))
		elif time.lower() == 'years':
			survival_time = (data['survival']/(365))
		else:
			survival_time = data['survival']
		kmf.fit(survival_time, data['event'], label = group)
		# print(data[survival])

		# print(kmf.survival_function_)
		f.write("%s\t" % group)
		for x in range(95, -1, -5):
			f.write(str(qth_survival_times(x/100, kmf.survival_function_)) + "\t")
		f.write("\n")	

		kmf.plot(ax=ax, show_censors=censors, ci_show=ci, linewidth=2.5)

	# Make the graph pretty!
	textbox = dict(horizontalalignment = 'left', verticalalignment = 'bottom', fontname = 'Arial', fontsize = 18)
	labels = dict(horizontalalignment = 'center', verticalalignment = 'center', fontname = 'Arial', fontsize = 28)

	ax.grid(False)
	ax.set_ylim(0,1.05)
	ax.spines['left'].set_linewidth(2.5)
	ax.spines['right'].set_linewidth(2.5)
	ax.spines['top'].set_linewidth(2.5)
	ax.spines['bottom'].set_linewidth(2.5)
	ax.yaxis.set_tick_params(width=2.5)
	ax.xaxis.set_tick_params(width=2.5)
	ax.xaxis.set_ticks_position('bottom')
	ax.yaxis.set_ticks_position('left')

	# plt.title('%s' % (analysis_type), labels, y = 1.05)
	plt.xlabel('%s Post-Diagnosis' % time, labels, labelpad = 20)
	if analysis_type == 'survival':
		plt.ylabel('Overall Survival', labels, labelpad = 20)
	else:
		plt.ylabel('Relapse-Free Survival', labels, labelpad=20)
	plt.xticks(fontname = 'Arial', fontsize = 24)
	plt.yticks(fontname = 'Arial', fontsize = 24)
	ax.tick_params(axis='y', pad=10)
	ax.tick_params(axis='x', pad=10)


	legend = ax.legend(frameon=False,loc=3)
	counter=0
	for label in legend.get_texts():
		label.set_fontsize(20)
		label.set_text('%s   n=%d' % (unique_groups[counter], n_in_groups[counter]))
		counter += 1

	if len(unique_groups) == 2:	
		plt.text(0.95, 0.05, 'p = %.2g' % (stat_results.p_value), fontname='Arial', fontsize=20, ha='right', transform=ax.transAxes)

	plt.tight_layout()


	fig.savefig('Kaplan_%s.png' % analysis_type, transparent = True)
	fig.savefig('Kaplan_%s.eps' % analysis_type, transparent = True)
	if showplot == True:
		plt.show()
	plt.close(fig)
Пример #53
0
    def data_fit(self):
        user_list = []
        self.hyd_events.create_index('FromUserName')
        self.hyd_events.create_index('Event')
        self.hyd_users.create_index('openid')
        for elem in self.hyd_events.find({'Event': 'subscribe'}):
            user_list.append(elem['FromUserName'])
        user_list = list(set(user_list))
        print len(user_list)
        now_time = time.time()

        # add subscribe time
        # three tag: pic, text, event
        # format: 'user_id':'', 'sub_time':'', 'unsub_time':'', 'event':''.
        duration = []
        observed = []
        group = []

        time_block = []
        for elem in user_list:
            user_dict = {}
            for item in self.hyd_events.find({'FromUserName': elem}):
                time_block.append(item['CreateTime'])
            earlist = min(time_block)
            latest = max(time_block)
            sub_time = int(earlist)
            curt = self.hyd_events.find_one({'$and': [{'FromUserName': elem}, {'Event': 'unsubscribe'}]})
            if curt is None:
                unsub_time = int(now_time)
                user_dict['observed'] = 0
            else:
                unsub_time = int(latest)
                user_dict['observed'] = 1

            try:
                user_dict['duration'] = abs(unsub_time - sub_time)
            except Exception, e:
                print e
                print unsub_time
                print sub_time
            check = self.hyd_users.find_one({'openid': elem})
            # if gender exists, set it, if not, set gender=0, which means gender unknow
            try:
                user_dict['gender'] = check['sex']
            except TypeError:
                user_dict['gender'] = 0

            duration.append(user_dict['duration'] / 86400)
            observed.append(user_dict['observed'])
            group.append(user_dict['gender'])
            dura_obj = array(duration)
            obs_obj = array(observed)
            group_obj = array(group)
            DataFrame(dura_obj, index=group_obj)
            DataFrame(obs_obj, index=group_obj)
            male = group_obj == 1
            female = group_obj == 2
            other = group_obj == 0

            kmf = KaplanMeierFitter()
            kmf.fit(dura_obj, obs_obj, label='both')
            ax = kmf.plot()
            ax.get_figure().savefig('maleAndFemale')
Пример #54
0
		return t
	elif is_number(c['year_of_birth']) == True and is_number(c['age_at_diagnosis']) == True and is_number(c['days_to_death']) == False:
		t = 2018 - float(c['year_of_birth']) - (float(c['age_at_diagnosis'])*4/(365*3 + 366))
		return t
	else:
		return "NotApplicable"

matrix['duration'] = matrix.apply(duration, axis = 1)
matrix['event'] = matrix.apply(event, axis = 1)
matrix = matrix[['bcr_sample_barcode', 'duration', 'event']]
#new_header = matrix.iloc[0] #grab the first row for the header
#matrix = matrix[1:] #take the data less the header row
#matrix.columns = new_header
matrix = matrix[matrix['duration']!="NotApplicable"]


kmf = KaplanMeierFitter()
kmf.fit(durations = matrix.duration, event_observed = matrix.event)

kmf.survival_function_

# plot the KM estimate
kmf.plot()
# Add title and y-axis label
plt.title("The Kaplan-Meier Estimate for BRCA (total)")
plt.ylabel("Probability a patient is still active")

plt.show()


Пример #55
0
Файл: km.py Проект: xcodevn/SADP
EPS_LIST = [0.05,0.1,0.2,0.4,0.8,1.6]

bins0 = config.BIN0
bins1 = config.BIN1

df = pd.read_stata("wichert.dta")
data_ = zip(df.time/max(df.time), df.event.astype(int))
data  = [(a, b) for (a,b) in data_ if a >= config.GAMMA]

print("[*] Remove #%d outliers" % (len(data_) - len(data)))
N  = len(df) # number of data points

kmf = KaplanMeierFitter()
(T, E) = zip(*data)
kmf.fit(T, event_observed=E)
#naf = NelsonAalenFitter()
#naf.fit(T, event_observed=E)
#ax = pyplot.subplot(121)
#naf.plot(ax=ax)

#ax = pyplot.subplot(122)
#kmf.plot(ax=ax)

true_value =  kmf.survival_function_.values
#naf.cumulative_hazard_.to_csv("naf.csv")

#pyplot.show()

data0  = [ a for (a,b) in data if b == 0 ]
data1  = [ a for (a,b) in data if b == 1 ]
Пример #56
0
def execute():
    matplotlib.rc("font", size=20)

    engine, session = database.initialize("sqlite:///../data/isrid-master.db")

    # Query with Group.size may take awhile, at least for Charles
    # Not sure why
    query = session.query(Incident.total_hours, Subject.survived, Group.category, Group.size).join(Group, Subject)
    print("Tabulating query... may take awhile for unknown reasons.")
    df = tabulate(query)
    print("Done tabulating.")
    print(df.describe())
    database.terminate(engine, session)

    df = df.assign(
        days=[total_hours.total_seconds() / 3600 / 24 for total_hours in df.total_hours],
        doa=[not survived for survived in df.survived],
    )
    df = df[0 <= df.days]

    rows, columns = 2, 2
    grid, axes = plt.subplots(rows, columns, figsize=(15, 10))

    categories = Counter(df.category)
    plot = 0
    kmfs = []
    options = {"show_censors": True, "censor_styles": {"marker": "|", "ms": 6}, "censor_ci_force_lines": False}

    for category, count in categories.most_common()[: rows * columns]:
        print("Category:", category)
        ax = axes[plot // columns, plot % columns]
        df_ = df[df.category == category]
        N, Ndoa = len(df_), sum(df_.doa)
        Srate = 100 * (1 - Ndoa / N)
        grp = df_[df_.size > 1]
        sng = df_[df_.size == 1]
        kmf = KaplanMeierFitter()
        # kmf.fit(df_.days, event_observed=df_.doa, label=category)
        # kmf.plot(ax=ax, ci_force_lines=True)
        kmf.fit(grp.days, event_observed=grp.doa, label=category + " Groups")
        kmf.plot(ax=ax, **options)
        kmf.fit(sng.days, event_observed=sng.doa, label=category + " Singles")
        kmf.plot(ax=ax, **options)
        kmfs.append(kmf)

        ax.set_xlim(0, min(30, 1.05 * ax.get_xlim()[1]))
        ax.set_ylim(0, 1)
        ax.set_title("{}, N = {}, DOA = {}, {:.0f}% surv".format(category, N, Ndoa, Srate))
        ax.set_xlabel("Total Incident Time (days)")
        ax.set_ylabel("Probability of Survival")

        # ax.legend_.remove()
        # ax.grid(True)

        plot += 1

    grid.suptitle("Kaplan-Meier Survival Curves", fontsize=25)
    grid.tight_layout()
    grid.subplots_adjust(top=0.9)
    grid.savefig("../doc/figures/kaplan-meier/km-grid-large.svg", transparent=True)

    combined = plt.figure(figsize=(15, 10))
    ax = combined.add_subplot(1, 1, 1)
    for kmf in kmfs[: rows * columns]:
        kmf.plot(ci_show=False, show_censors=True, censor_styles={"marker": "|", "ms": 6}, ax=ax)

    ax.set_xlim(0, 15)
    ax.set_ylim(0, 1)
    ax.set_xlabel("Total Incident Time (days)")
    ax.set_ylabel("Probability of Survival")
    ax.set_title("Kaplan-Meier Survival Curves", fontsize=25)
    ax.grid(True)
    combined.savefig("../doc/figures/kaplan-meier/km-combined-large.svg", transparent=True)

    plt.show()
Пример #57
0
import pandas as pd
from pandas import DataFrame, Series
import numpy as np
import scipy

import lifelines

figsize(12.5,5)
np.set_printoptions(precision=2, suppress=True)

from lifelines import KaplanMeierFitter
survival_times = np.array([0.,3.,4.5, 10., 1.])
events = np.array([False, True, True, False, True])

kmf = KaplanMeierFitter()
kmf.fit(survival_times, event_observed=events)

print kmf.survival_function_
print kmf.median_
kmf.plot()


## example 2
import matplotlib.pylab as plt
%pylab

figsize(12.5,6)
from lifelines.plotting import plot_lifetimes
from numpy.random import uniform, exponential
N = 25
current_time = 10
#Griffin Calme
#Group 15, week 8 activity
#Kaplan Meier survival curve

import pandas as pd
from lifelines import KaplanMeierFitter
import matplotlib.pyplot as plt
kmf = KaplanMeierFitter()

df = pd.DataFrame.from_csv('wk8gp15KapMeier.csv')

print(df)

groups = df['Group']
ix = (groups == 2)

T = df['SERIAL TIME (years)']
E = df['STATUS']

kmf.fit(T[~ix], E[~ix], label='1')
ax = kmf.plot()

kmf.fit(T[ix], E[ix], label='2')
kmf.plot(ax=ax, ci_force_lines=False)

plt.show()
Пример #59
0
from lifelines import KaplanMeierFitter

import matplotlib.pyplot as plt

df = pd.read_csv('joined.csv.bz2', sep=',', compression='bz2', low_memory=False)

# strip ' months' in column 'term'
df['term'] = df['term'].map(lambda x: int(x.strip(' months')))

# prepare column 'T' for training survival model
df['T'] = df['firstMissed'] / df['term']
df.loc[df['loan_status']=='Fully Paid', 'T']=1

# column 'E' seems to be column 'censored'

T = df['T']
E = ~df['censored']


kmf = KaplanMeierFitter()
kmf.fit(T, event_observed=E) # more succiently, kmf.fit(T,E)


kmf.survival_function_
kmf.median_
kmf.plot()
plt.show()