Exemplos de CoxPHFitter.fit em Python, exemplos de lifelines.CoxPHFitter.fit em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: cumulative_incidence.py Projeto: dsgelab/risteys

def comp_cph(endpoint, sex, df_events, df_info):
    """Prepare data and fit a Cox PH model for the given endpoint"""
    logger.info(f"{endpoint} - {sex} - Computing cumulative incidence")
    logger.debug(f"{endpoint} - {sex} - Assigning cases and controls")

    # Cases
    df_cases = df_events.loc[df_events.ENDPOINT == endpoint,
                             ["FINNGENID", "ENDPOINT_AGE"]]
    if df_cases.shape[0] < MIN_CASES:
        raise NotEnoughCases(f"Not enough cases (< {MIN_CASES}).")

    # Take all individual, also dealing with sex-specific endpoints
    df_all = df_info.loc[df_info.SEX == sex, ["FINNGENID", "FU_END_AGE"]]

    df_all = df_all.merge(df_cases, how="left", on="FINNGENID")
    df_all["outcome"] = ~df_all.ENDPOINT_AGE.isna(
    )  # ENDPOINT_AGE is NaN for controls
    df_all["duration"] = df_all.FU_END_AGE
    df_all.loc[df_all.outcome, "duration"] = df_all.loc[df_all.outcome,
                                                        "ENDPOINT_AGE"]

    # Trim down the columns so the later call to cph.fit() doesn't try to use extra columns
    dfcox = df_all.loc[:, ["outcome", "duration"]]

    logger.debug(f"{endpoint} - Fitting Cox model")
    cph = CoxPHFitter()
    cph.fit(dfcox, duration_col="duration", event_col="outcome")

    return dfcox, cph

Exemplo n.º 2

0

Exibir arquivo

Arquivo: cox_model.py Projeto: fairynn/covid-19-mortality

def main(data_df):

    for key in th_dict.keys():
        if not key.find("HU") > 0:
            data_df[key] = data_df[key].fillna(0)
        data_df[key] = data_df[key].map(lambda input: 1
                                        if input >= th_dict[key] else 0)

    add_DF = pd.DataFrame()
    add_DF["V-HU"] = data_df['HU_of_consolidation'] + data_df[
        'Volume_of_total_pneumonia_infection']  #0,1,2

    combinations_df = pd.concat(
        [
            data_df["Duration"],
            data_df["Death"],
            data_df["Age"],
            data_df["Blood_Oxygen"],
            data_df["C-Reactive_protein"],
            #data_df["White_blood_cell_count"] ,
            data_df["Lymphocyte_count"],
            data_df["Cerebrovascular_Disease"],
            data_df["Sex"],
            #data_df["Neutrophil_count"],
            #data_df["D-dimer"] ,
            data_df["Lactic_dehydrogenase"],
            add_DF["V-HU"],
        ],
        axis=1)

    cph = CoxPHFitter()
    cph.fit(combinations_df, "Duration", event_col="Death", step_size=0.01)

    cph.print_summary()

Exemplo n.º 3

0

Exibir arquivo

def f(train,threshold,test):
    hi=h(train)
    h_score=pd.DataFrame(hi, index=np.array(range(1,21149)))
    gene_ls=h_score.index[h_score.iloc[:,0]>1].tolist()
    candidate_genes=['V{0}'.format(element) for element in gene_ls]

    # qualified genes were selected 

    stdsc = preprocessing.StandardScaler()
    np_scaled_train = stdsc.fit_transform(train.loc[:,candidate_genes])
    np_scaled_test  = stdsc.transform(test.loc[:,candidate_genes])
    pca = sklearnPCA(n_components=1)   
    X_train_pca = pca.fit_transform(np_scaled_train) # This is the result 
    X_test_pca  = pca.transform(np_scaled_test)
    eigen_val=pca.explained_variance_  #eigen value is the explained variance 

    
    #assign pca score to the test dataset 
    test=test.assign(w=pd.Series(np.ones(len(test.patient_id))))
    test['w']=X_test_pca
    testset_surv=test[['event_free_survival_time_days','death','w']]
    
    #do cox-regression

    # Using Cox Proportional Hazards model
    cph = CoxPHFitter()
    cph.fit(testset_surv,'event_free_survival_time_days',event_col='death')
    
    return cph.print_summary()

Exemplo n.º 4

0

Exibir arquivo

def coxcalc(df, x, survivaltime, status):
    df5 = df[[status, survivaltime, x]]
    df5[x] = pd.to_numeric(df5[x])
    df5 = df5.dropna()
    cph = CoxPHFitter()
    cph.fit(df5, duration_col=survivaltime, event_col=status, show_progress=False)
    return cph.summary

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_statistics.py Projeto: liuylu5/lifelines

def test_proportional_hazard_test_with_weights():
    """

    library(survival)
    df <- data.frame(
      "var1" = c(0.209325, 0.693919, 0.443804, 0.065636, 0.386294),
      "T" = c(5.269797, 6.601666, 7.335846, 11.684092, 12.678458),
      "E" = c(1, 1, 1, 1, 1),
      "w" = c(1, 0.5, 2, 1, 1)
    )

    c = coxph(formula=Surv(T, E) ~ var1 , data=df, weights=w)
    cox.zph(c, transform='rank')
    """

    df = pd.DataFrame({
        "var1": [0.209325, 0.693919, 0.443804, 0.065636, 0.386294],
        "T": [5.269797, 6.601666, 7.335846, 11.684092, 12.678458],
        "w": [1, 0.5, 2, 1, 1],
    })
    df["E"] = True

    cph = CoxPHFitter()
    cph.fit(df, "T", "E", weights_col="w")

    results = stats.proportional_hazard_test(
        cph, df, time_transform=["km", "rank", "log", "identity"])
    results.print_summary(5)
    npt.assert_allclose(results.summary.loc["var1", "rank"]["test_statistic"],
                        0.108,
                        rtol=1e-2)

Exemplo n.º 6

0

Exibir arquivo

    def mpss_ph_lifelines(self):
        """

        Performs proportional hazards regression using lifelines package.

        :return: feature importance
        """
        x_train = pd.DataFrame(self.x_train)

        # Remove any feature columns that are all 0 values, otherwise cannot run regression
        lifelines_dataset = x_train.loc[:, (x_train != 0).any(axis=0)]

        # Reformat for lifelines package
        lifelines_dataset['scores'] = self.scores
        lifelines_dataset['event'] = 1

        # Run proportional hazards regression
        cph = CoxPHFitter(penalizer=5, alpha=1)
        cph.fit(lifelines_dataset, duration_col='scores', event_col='event')

        # Dataframe with coefficients, absolute value of coefficients, and p-values
        importance = cph.summary.reset_index()[['covariate', 'coef', 'p']]
        importance['feature'] = importance['covariate']
        importance['coef_abs'] = importance['coef'].apply(
            lambda x: math.fabs(x))

        # Sort feature importance
        importance = importance.sort_values(
            'coef_abs', ascending=False).reset_index(drop=True)
        return importance

Exemplo n.º 7

0

Exibir arquivo

    def _compute_likelihood_ratio_test(self):
        """
        This function computes the likelihood ratio test for the Cox model. We
        compare the existing model (with all the covariates) to the trivial model
        of no covariates.

        Conveniently, we can actually use another class to do most of the work.

        """

        trivial_dataset = self.start_stop_and_events.groupby(level=0).last()[[
            "event", "stop"
        ]]
        weights = self.weights.groupby(level=0).last()[["__weights"]]
        trivial_dataset = trivial_dataset.join(weights)

        cp_null = CoxPHFitter()
        cp_null.fit(trivial_dataset,
                    "stop",
                    "event",
                    weights_col="__weights",
                    show_progress=False)

        ll_null = cp_null._log_likelihood
        ll_alt = self._log_likelihood

        test_stat = 2 * ll_alt - 2 * ll_null
        degrees_freedom = self.hazards_.shape[1]
        _, p_value = chisq_test(test_stat,
                                degrees_freedom=degrees_freedom,
                                alpha=0.0)
        return test_stat, degrees_freedom, np.log(p_value)

Exemplo n.º 8

0

Exibir arquivo

def c_index_multiple_from_python(matrix,
                                 isdead,
                                 nbdays,
                                 matrix_test,
                                 isdead_test,
                                 nbdays_test,
                                 isfactor=False):
    """
    """
    frame = pd.DataFrame(matrix)
    frame["isdead"] = isdead
    frame["nbdays"] = nbdays

    frame_test = pd.DataFrame(matrix_test)
    frame_test["isdead"] = isdead_test
    frame_test["nbdays"] = nbdays_test

    cph = CoxPHFitter()

    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            cph.fit(frame, "nbdays", "isdead")
    except Exception as e:
        print(e)
        return np.nan

    cindex = cph.score(frame_test, scoring_method="concordance_index")

    return cindex

Exemplo n.º 9

0

Exibir arquivo

    def DoFeatureSelectionCPH(self, x, c, s, xnames, fold, sel_f_num,
                              dev_index):
        variance_th = 0.15
        xdf = pd.DataFrame(x, columns=xnames)
        sel_idx = xdf.std() > variance_th  #true or false
        xdf = xdf.loc[:, sel_idx]
        xnames = xnames[sel_idx]
        x = xdf.values

        gene_p_value = []
        for i in tqdm(range(0, x.shape[1])):
            subset_num = i
            cph_h_trn_stack = np.column_stack(
                (x[:, subset_num:subset_num + 1], c, s))
            cph_cols = xnames.copy().tolist()[subset_num:subset_num + 1]
            cph_cols.append('E')
            cph_cols.append('S')
            cph_train_df = pd.DataFrame(cph_h_trn_stack, columns=cph_cols)
            cph = CoxPHFitter()
            cph.fit(cph_train_df,
                    duration_col='S',
                    event_col='E',
                    step_size=0.1,
                    show_progress=False)
            f_scores = pd.DataFrame(cph.summary)['p'].values
            gene_p_value.append(f_scores[0])

        gene_p_value = np.asarray(gene_p_value)
        sort_idx = np.argsort(gene_p_value)
        f_name_sort = np.asarray(xnames)[sort_idx]
        f_score_sort = gene_p_value[sort_idx]

        return sort_idx, f_name_sort, f_score_sort  #, auc

Exemplo n.º 10

0

Exibir arquivo

Arquivo: test_statistics.py Projeto: humanoftheforest/lifelines_VR

def test_proportional_hazard_test_with_weights_and_strata():
    """
    library(survival)
    df <- data.frame(
      "var1" = c(0.209325, 0.693919, 0.443804, 0.065636, 0.386294),
      "T" = c(5.269797, 6.601666, 7.335846, 11.684092, 12.678458),
      "E" = c(1, 1, 1, 1, 1),
      "w" = c(1, 0.5, 2, 1, 1),
      "s" = c(1, 1, 0, 0, 0)
    )

    c = coxph(formula=Surv(T, E) ~ var1 + strata(s), data=df, weights=w)
    cz = cox.zph(c, transform='identity')

    """

    df = pd.DataFrame(
        {
            "var1": [0.209325, 0.693919, 0.443804, 0.065636, 0.386294],
            "T": [5.269797, 6.601666, 7.335846, 11.684092, 12.678458],
            "w": [1, 0.5, 2, 1, 1],
            "s": [1, 1, 0, 0, 0],
        }
    )
    df["E"] = True

    cph = CoxPHFitter()
    cph.fit(df, "T", "E", weights_col="w", strata="s", robust=True)

    results = stats.proportional_hazard_test(cph, df, time_transform="identity")

    npt.assert_allclose(results.summary.loc["var1"]["test_statistic"], 0.0283, rtol=1e-3)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: test_plotting.py Projeto: tjmills/lifelines

 def test_coxph_plot_covariate_groups_with_multiple_variables(self, block):
     df = load_rossi()
     cp = CoxPHFitter()
     cp.fit(df, "week", "arrest")
     cp.plot_covariate_groups(["age", "prio"], [[10, 0], [50, 10], [80, 90]])
     self.plt.title("test_coxph_plot_covariate_groups_with_multiple_variables")
     self.plt.show(block=block)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: test_statistics.py Projeto: humanoftheforest/lifelines_VR

def test_proportional_hazard_test_with_kmf_with_some_censorship():
    """

    library(survival)
    df <- data.frame(
      "var1" = c(0.209325, 0.693919, 0.443804, 0.065636, 0.386294),
      "T" = c(5.269797, 6.601666, 7.335846, 11.684092, 12.678458),
      "E" = c(1, 1, 1, 0, 1)
    )

    c = coxph(formula=Surv(T, E) ~ var1 , data=df)
    cox.zph(c, transform='km')
    """

    df = pd.DataFrame(
        {
            "var1": [0.209325, 0.693919, 0.443804, 0.065636, 0.386294],
            "T": [5.269797, 6.601666, 7.335846, 11.684092, 12.678458],
            "E": [1, 1, 1, 0, 1],
        }
    )

    cph = CoxPHFitter()
    cph.fit(df, "T", "E")

    results = stats.proportional_hazard_test(cph, df)
    npt.assert_allclose(results.summary.loc["var1"]["test_statistic"], 1.013802, rtol=1e-3)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: test_statistics.py Projeto: humanoftheforest/lifelines_VR

def test_proportional_hazard_test_with_kmf_with_some_censorship_and_weights():
    """

    library(survival)
    df <- data.frame(
      "var1" = c(0.209325, 0.693919, 0.443804, 0.065636, 0.386294),
      "T" = c(5.269797, 6.601666, 7.335846, 11.684092, 12.678458),
      "E" = c(1, 1, 1, 0, 1),
      "w" = c(1, 0.5, 2, 1, 1),
    )

    c = coxph(formula=Surv(T, E) ~ var1 , data=df, weights=w)
    cox.zph(c, transform='km')
    """

    df = pd.DataFrame(
        {
            "var1": [0.209325, 0.693919, 0.443804, 0.065636, 0.386294],
            "T": [5.269797, 6.601666, 7.335846, 11.684092, 12.678458],
            "E": [1, 1, 1, 0, 1],
            "w": [1, 0.5, 5, 1, 1],
        }
    )

    cph = CoxPHFitter()
    with pytest.warns(StatisticalWarning, match="weights are not integers"):
        cph.fit(df, "T", "E", weights_col="w")
        results = stats.proportional_hazard_test(cph, df)
        npt.assert_allclose(results.summary.loc["var1"]["test_statistic"], 0.916, rtol=1e-2)

Exemplo n.º 14

0

Exibir arquivo

def getHazardRatio(df_col, os, event, genename, value, binary=False, age=None, return_sign=False):
    cph = CoxPHFitter()
    os_data = pd.DataFrame({'Gene': df_col,
                            'Duration': os,
                            'Flag': event})
    if age is not None:
        os_data['Age'] = age

    try:
        cph.fit(os_data, 'Duration', 'Flag', show_progress=False)
    except ValueError:
        print('Not working, returning nans')
        return genename, value, np.nan, df_col.sum()

    hazard_ratio = np.exp(cph.hazards_['Gene'].values)

    if binary:
        if hazard_ratio < 1:
            hazard_ratio = 1/hazard_ratio
            value = 1

    if return_sign:
        return genename, value, hazard_ratio[0], df_col.sum()
    else:
        return hazard_ratio

Exemplo n.º 15

0

Exibir arquivo

Arquivo: porch.py Projeto: statisticalbiotechnology/metabric-pathway-survival

def survival(row, phenotype_df, duration_col = 'T', event_col = 'E', other_cols = []):
    """
    duration_col: survival time
    event_col: whether an event (death or other) has ocured or not. 0 for no, 1 for yes
    other_cols: other variables to consider in the regression
    """
    phenotype_df = phenotype_df.T
    phenotype_df = phenotype_df.join(row.astype(float))
    phenotype_df[duration_col] = phenotype_df[duration_col].astype(float)
    phenotype_df[event_col] = phenotype_df[event_col].astype(int)

    # The following lines deal with char conflicts in patsy formulas
    duration_col = duration_col.replace(' ','_').replace('.','_').replace('-','_')
    event_col = event_col.replace(' ','_').replace('.','_').replace('-','_')
    other_cols = [x.replace(' ','_').replace('.','_').replace('-','_') for x in other_cols]
    row.name = row.name.replace(' ','_').replace('.','_').replace('-','_')
    phenotype_df.columns = [x.replace(' ','_').replace('.','_').replace('-','_') for x in phenotype_df.columns]

    formula = row.name + ' + ' + duration_col + ' + ' + event_col
    if not not other_cols:
        other_cols = [x.replace(' ','_').replace('.','_') for x in other_cols]
        formula = formula + ' + ' + ' + '.join(other_cols)
    X = patsy.dmatrix(formula_like = formula, data = phenotype_df, return_type = 'dataframe')
    X = X.drop(['Intercept'], axis = 1)
    cph = CoxPHFitter()
    cph.fit(X, duration_col = duration_col, event_col = event_col)
    result = cph.summary.loc[row.name]
    return result

Exemplo n.º 16

0

Exibir arquivo

Arquivo: test_plotting.py Projeto: rserran/lifelines

 def test_coxph_plot_partial_effects_on_outcome_with_multiple_variables(self, block):
     df = load_rossi()
     cp = CoxPHFitter()
     cp.fit(df, "week", "arrest")
     cp.plot_partial_effects_on_outcome(["age", "prio"], [[10, 0], [50, 10], [80, 90]])
     self.plt.title("test_coxph_plot_partial_effects_on_outcome_with_multiple_variables")
     self.plt.show(block=block)

Exemplo n.º 17

0

Exibir arquivo

Arquivo: test_plotting.py Projeto: rserran/lifelines

 def test_coxph_plot_partial_effects_on_outcome_with_cumulative_hazard(self, block):
     df = load_rossi()
     cp = CoxPHFitter()
     cp.fit(df, "week", "arrest")
     cp.plot_partial_effects_on_outcome("age", [10, 50, 80], y="cumulative_hazard")
     self.plt.title("test_coxph_plot_partial_effects_on_outcome")
     self.plt.show(block=block)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: test_plotting.py Projeto: rserran/lifelines

 def test_coxph_plotting_with_subset_of_columns(self, block):
     df = load_regression_dataset()
     cp = CoxPHFitter()
     cp.fit(df, "T", "E")
     cp.plot(columns=["var1", "var2"])
     self.plt.title("test_coxph_plotting_with_subset_of_columns")
     self.plt.show(block=block)

Exemplo n.º 19

0

Exibir arquivo

 def test_coxph_plot_covariate_groups_with_single_strata(self, block):
     df = load_rossi()
     cp = CoxPHFitter()
     cp.fit(df, "week", "arrest", strata="paro")
     cp.plot_covariate_groups("age", [10, 50, 80])
     self.plt.title("test_coxph_plot_covariate_groups_with_strata")
     self.plt.show(block=block)

Exemplo n.º 20

0

Exibir arquivo

Arquivo: test_plotting.py Projeto: rserran/lifelines

 def test_coxph_plotting(self, block):
     df = load_regression_dataset()
     cp = CoxPHFitter()
     cp.fit(df, "T", "E")
     cp.plot()
     self.plt.title("test_coxph_plotting")
     self.plt.show(block=block)

Exemplo n.º 21

0

Exibir arquivo

Arquivo: test_plotting.py Projeto: rserran/lifelines

 def test_coxph_plotting_with_hazards_ratios(self, block):
     df = load_regression_dataset()
     cp = CoxPHFitter()
     cp.fit(df, "T", "E")
     cp.plot(hazard_ratios=True)
     self.plt.title("test_coxph_plotting")
     self.plt.show(block=block)

Exemplo n.º 22

0

Exibir arquivo

 def test_coxph_plot_covariate_groups(self, block):
     df = load_rossi()
     cp = CoxPHFitter()
     cp.fit(df, "week", "arrest")
     cp.plot_covariate_groups("age", [10, 50, 80])
     self.plt.title("test_coxph_plot_covariate_groups")
     self.plt.show(block=block)

Exemplo n.º 23

0

Exibir arquivo

def main():
    # Load data
    print('Load data...')
    hp = Hyperparameters()
    data = np.load('../' + hp.data_pp_dir + 'data_arrays_' + hp.gender + '.npz')
    
    print('Use all data for model fitting...')
    x = data['x']
    time = data['time']
    event = data['event']
    
    cols_list = load_obj('../' + hp.data_pp_dir + 'cols_list.pkl')
    
    df = pd.DataFrame(x, columns=cols_list)
    df['TIME'] = time
    df['EVENT'] = event

    ###################################################################
    
    print('Add additional columns...')
    df_index_code = feather.read_dataframe('../' + hp.results_dir + 'hr_addcodes_' + hp.gender + '.feather')
    df_index_code = pd.concat([df_index_code[df_index_code['TYPE']==1].head(10), df_index_code[df_index_code['TYPE']==0].head(10)], sort=False)
    
    for index, row in df_index_code.iterrows():
        print(row['DESCRIPTION'])
        df[row['DESCRIPTION']] = (data['codes'] == row['INDEX_CODE']).max(axis=1)
        cols_list = cols_list + [row['DESCRIPTION']]
    
    ###################################################################
    
    print('Fitting...')
    cph = CoxPHFitter()
    cph.fit(df, duration_col='TIME', event_col='EVENT', show_progress=True, step_size=0.5)
    cph.print_summary()
    print('done')

Exemplo n.º 24

0

Exibir arquivo

    def _fit_cox(self):
        """ private method to fit Cox model """
        if self._cf is not None:
            return

        cox_df1 = pd.DataFrame(self.survival0.df,
                               columns=[self.time_col1, self.event_col1])
        cox_df1[self.survival1.label] = 0
        cox_df2 = pd.DataFrame(self.survival1.df,
                               columns=[self.time_col2, self.event_col2])
        if self.time_col1 != self.time_col2:
            cox_df2 = cox_df2.rename(columns={self.time_col2: self.time_col1})
        if self.event_col1 != self.event_col2:
            cox_df2 = cox_df2.rename(
                columns={self.event_col2: self.event_col1})
        cox_df2[self.survival1.label] = 1
        cox_df = cox_df1.append(cox_df2, ignore_index=True)

        cox_fitted = CoxPHFitter(normalize=False)
        cox_fitted.fit(cox_df,
                       self.time_col1,
                       event_col=self.event_col1,
                       include_likelihood=False)

        self._cf = cox_fitted

Exemplo n.º 25

0

Exibir arquivo

Arquivo: test_plotting.py Projeto: rserran/lifelines

 def test_coxph_plot_partial_effects_on_outcome_with_single_strata(self, block):
     df = load_rossi()
     cp = CoxPHFitter()
     cp.fit(df, "week", "arrest", strata="paro")
     cp.plot_partial_effects_on_outcome("age", [10, 50, 80])
     self.plt.title("test_coxph_plot_partial_effects_on_outcome_with_strata")
     self.plt.show(block=block)

Exemplo n.º 26

0

Exibir arquivo

def coxreg_single_run(xtr, ytr, penalty):
    df_tr = pd.DataFrame(np.concatenate((ytr, xtr), axis=1))
    df_tr.columns = ['status', 'time'
                     ] + ['X' + str(i + 1) for i in range(xtr.shape[1])]
    cph = CoxPHFitter(penalizer=penalty)
    cph.fit(df_tr, duration_col='time', event_col='status')
    return cph

Exemplo n.º 27

0

Exibir arquivo

Arquivo: test_plotting.py Projeto: rserran/lifelines

 def test_spline_coxph_plot_partial_effects_on_outcome_with_strata(self, block):
     df = load_rossi()
     cp = CoxPHFitter(baseline_estimation_method="spline", n_baseline_knots=2)
     cp.fit(df, "week", "arrest", strata=["wexp"])
     cp.plot_partial_effects_on_outcome("age", [10, 50, 80])
     self.plt.title("test_spline_coxph_plot_partial_effects_on_outcome_with_strata")
     self.plt.show(block=block)

Exemplo n.º 28

0

Exibir arquivo

def fit_cox(subset,
            name,
            duration_col='days_survival',
            event_col='vital_status',
            *args,
            **kwargs):
    '''
	use lifelines to fit COXPHFitter model.
	return summary plus the corrected p-value

	subset: DataFrame
	name: name of the analysis
	duration_col: column of subset with number of days sample survived
	event_col: column of subset with 0/1 wheter the sample is alive or dead
	*args: to be passed to CoxPHFitter
	**kwargs: to be passed to CoxPHFitter
	'''
    from lifelines import CoxPHFitter
    from statsmodels.stats.multitest import multipletests
    cph = CoxPHFitter(*args, **kwargs)
    try:
        cph.fit(subset, duration_col=duration_col, event_col=event_col)
        summary = cph.summary
        p_vals = multipletests(cph.summary["p"], method="bonferroni")[1]
        summary["corrected_p"] = p_vals
        summary["-log2(corrected_p)"] = -np.log2(p_vals)
        return summary, cph
    except:
        print(*sys.exc_info())
        return None, None

Exemplo n.º 29

0

Exibir arquivo

Arquivo: test_plotting.py Projeto: rserran/lifelines

 def test_coxph_plot_partial_effects_on_outcome_with_nonnumeric_strata(self, block):
     df = load_rossi()
     df["strata"] = np.random.choice(["A", "B"], size=df.shape[0])
     cp = CoxPHFitter()
     cp.fit(df, "week", "arrest", strata="strata")
     cp.plot_partial_effects_on_outcome("age", [10, 50, 80])
     self.plt.title("test_coxph_plot_partial_effects_on_outcome_with_single_strata")
     self.plt.show(block=block)

Exemplo n.º 30

0

Exibir arquivo

Arquivo: test_plotting.py Projeto: rserran/lifelines

 def test_coxph_plot_partial_effects_on_outcome_with_multiple_variables_and_strata(self, block):
     df = load_rossi()
     df["strata"] = np.random.choice(["A", "B"], size=df.shape[0])
     cp = CoxPHFitter()
     cp.fit(df, "week", "arrest", strata="strata")
     cp.plot_partial_effects_on_outcome(["age", "prio"], [[10, 0], [50, 10], [80, 90]])
     self.plt.title("test_coxph_plot_partial_effects_on_outcome_with_multiple_variables_and_strata")
     self.plt.show(block=block)

Exemplo n.º 31

0

Exibir arquivo

Arquivo: estimators_wrappers.py Projeto: sashaostr/sklearn-lifelines

    def fit(self, X, y, **fit_params):
        X_ = X.copy()
        X_[self.duration_column]=y[self.duration_column]
        if self.event_col is not None:
            X_[self.event_col] = y[self.event_col]

        params = self.get_params()
        est = CoxPHFitter(**params)

        est.fit(X_, duration_col=self.duration_column, event_col=self.event_col, initial_beta=self.initial_beta, include_likelihood=self.include_likelihood, strata=self.strata, **fit_params)
        self.estimator = est
        return self

Exemplo n.º 32

0

Exibir arquivo

Arquivo: multiCoeff.py Projeto: cooperlab/ActiveLearning

def cox_regression(clean_df):
	cf = CoxPHFitter()
	cf.fit(clean_df, 'time', event_col='event')
	summary_df = cf.summary
	#decimals = pd.Series([2, 2, 2], index=['exp(coef)', 'lower 0.95', 'upper 0.95'])
	#summary_df = summary_df.round(decimals)
	ori_dic = summary_df.to_dict()
	res_dic= {}
	for stat_of_interest in stats_of_interest:
		if stat_of_interest != 'p':
			res_dic[stat_of_interest] = round_dic(ori_dic[stat_of_interest])
		else:
			res_dic[stat_of_interest] = round_dic_eng(ori_dic[stat_of_interest])
	return res_dic

Exemplo n.º 33

0

Exibir arquivo

Arquivo: imm_cor.py Projeto: agartland/utils

def estCoxPHTE(df, treatment_col='treated', duration_col='dx', event_col='disease', covars=[]):
    """Estimates treatment efficacy using proportional hazards (Cox model).
    
    Parameters
    ----------
    df : pandas.DataFrame
    
    treatment_col : string
        Column in df indicating treatment.
    duration_col : string
        Column in df indicating survival times.
    event_col : string
        Column in df indicating events (censored data are 0)
    covars : list
        List of other columns to include in Cox model as covariates.
    
    Returns
    -------
    est : float
        Estimate of vaccine efficacy
    ci : vector, length 2
        95% confidence interval, [LL, UL]
    pvalue : float
        P-value for H0: VE=0"""
    
    coxphf = CoxPHFitter()
    
    coxphf.fit(df[[treatment_col, duration_col, event_col]+covars], duration_col=duration_col, event_col=event_col)
    
    te = 1 - np.exp(coxphf.hazards_.loc['coef', treatment_col])
    ci = 1 - np.exp(coxphf.confidence_intervals_[treatment_col].loc[['upper-bound', 'lower-bound']])
    pvalue = coxphf._compute_p_values()[0]

    ind1 = df[treatment_col] == 0
    ind2 = df[treatment_col] == 1
    results = logrank_test(df[duration_col].loc[ind1], df[duration_col].loc[ind2], event_observed_A=df[event_col].loc[ind1], event_observed_B=df[event_col].loc[ind2])
    index = ['TE', 'UB', 'LB', 'pvalue', 'logrank_pvalue', 'model']
    return pd.Series([te, ci['upper-bound'], ci['lower-bound'], pvalue, results.p_value, coxphf], index=index)

Exemplo n.º 34

0

Exibir arquivo

Arquivo: survival_analysis_python.py Projeto: jokerbea/BIN3005-PANDAS-AND-SKLEARN-SCRIPT-FOR-HANDLING-TCGA

tx = df['history_of_neoadjuvant_treatment']=='Yes'
ax = plt.subplot(111)

kmf1 = KaplanMeierFitter(alpha=0.95)
kmf1.fit(durations=df.ix[tx, survival_col], event_observed=df.ix[tx, censor_col], label=['Tx==Yes'])
kmf1.plot(ax=ax, show_censors=True,  ci_show=False)


kmf2 = KaplanMeierFitter(alpha=0.95)
kmf2.fit(durations=df.ix[~tx, survival_col], event_observed=df.ix[~tx, censor_col], label=['Tx==No'])
kmf2.plot(ax=ax, show_censors=True,  ci_show=False )

add_at_risk_counts(kmf1, kmf2, ax=ax)
plt.title ('Acute myeloid leukemia survival analysis with Tx and without Tx')
plt.xlabel(survival_col)
plt.savefig('km.png')

results = logrank_test(df.ix[tx, survival_col], df.ix[~tx, survival_col], df.ix[tx, censor_col], df.ix[~tx, censor_col], alpha=.99 )
results.print_summary()

cox = CoxPHFitter(normalize=False)
df_age = df[[survival_col, censor_col, 'age_at_initial_pathologic_diagnosis']]
df_age = df_age[pd.notnull(df_age['age_at_initial_pathologic_diagnosis'])]
cox = cox.fit(df_age, survival_col, event_col=censor_col, include_likelihood=True)
cox.print_summary()

scores = k_fold_cross_validation(cox, df_age, survival_col, event_col=censor_col, k=10)
print scores
print 'Mean score', np.mean(scores)
print 'Std', np.std(scores)

Exemplo n.º 35

0

Exibir arquivo

Arquivo: Cox_PH.py Projeto: pierceaw/intern_work

# Convert to data frame
data = pd.DataFrame({'duration': duration, 'event': not_censor, 'age': age, 'college': college})

# Plot observations with censoring
# plot_lifetimes(duration, event_observed = not_censor)

# Kaplan Meier Summary for Simulated Data
from lifelines import KaplanMeierFitter
kmf =  KaplanMeierFitter()
kmf.fit(duration, event_observed = not_censor)
kmf.survival_function_.plot()

# Cox-PH Model Regression
from lifelines import CoxPHFitter
cf = CoxPHFitter()
cf.fit(data, 'duration', event_col = 'event')
cf.print_summary()

## Get Predictions from Model ##

# 24 year old college grad
#college_24 = pd.DataFrame({'age':[24], 'college':[1]})
#cf.predict_survival_function(college_24).plot()

# 65 year old high school grad
#hs_65 = pd.DataFrame({'age':[65], 'college':[0]})
#cf.predict_survival_function(hs_65).plot()

# Predicted Survival for 24yr-old College Grad and 65yr-old HS Grad
mixed = pd.DataFrame({'age':[24, 65,42], 'college':[1,0,.4], 'index': ['24yr old College Grad','65yr old HS Grad','Average']})
mixed = mixed.set_index(['index']) # setting row names

Exemplo n.º 36

0

Exibir arquivo

Arquivo: HegemonUtil.py Projeto: sahoo00/Hegemon

def multivariate(df):
    from lifelines import CoxPHFitter
    cph = CoxPHFitter()
    cph.fit(df, duration_col='time', event_col='status',
            show_progress=True)
    cph.print_summary()  # access the results using cph.summary

Exemplo n.º 37

0

Exibir arquivo

Arquivo: SurvivalAnalysis.py Projeto: fclesio/learning-space

from lifelines.datasets import load_regression_dataset
regression_dataset = load_regression_dataset()

regression_dataset.head()






from lifelines import AalenAdditiveFitter, CoxPHFitter

# Using Cox Proportional Hazards model
cf = CoxPHFitter()
cf.fit(regression_dataset, 'T', event_col='E')
cf.print_summary()

# Using Aalen's Additive model
aaf = AalenAdditiveFitter(fit_intercept=False)
aaf.fit(regression_dataset, 'T', event_col='E')






x = regression_dataset[regression_dataset.columns - ['E','T']]
aaf.predict_survival_function(x.ix[10:12]).plot() #get the unique survival functions of the first two subjects

Exemplo n.º 38

0

Exibir arquivo

Arquivo: survival_analysis.py Projeto: liruikaiyao/workshop

from lifelines.datasets import generate_regression_dataset
regression_dataset = generate_regression_dataset()
from lifelines import AalenAdditiveFitter, CoxPHFitter
cf = CoxPHFitter()
cf.fit(regression_dataset, duration_col='T', event_col='E')
aaf = AalenAdditiveFitter(fit_intercept=False)
aaf.fit(regression_dataset, duration_col='T', event_col='E')
x = regression_dataset[regression_dataset.columns - ['E','T']]
aaf.predict_survival_function(x.ix[10:12]).plot()
aaf.plot()

Exemplo n.º 39

0

Exibir arquivo

Arquivo: cox.py Projeto: vdn207/Cancer-Data

"""

# print cancer['T'].unique()
# print cancer['E'].unique()
# cancer = cancer.dropna()


# the '-1' term
# refers to not adding an intercept column (a column of all 1s).
# It can be added to the Fitter class.

covMatrix = cancer.cov()

cf = CoxPHFitter()
cf.fit(covMatrix, "T", event_col="E")  # extra paramater for categorical , strata=catVar
cf.print_summary()

curve = cf.predict_survival_function(cancer)
curve.plot()
plt.show()
print "hazard coeff", cf.hazards_
print "baseline ", cf.baseline_hazard_

"""
scores = k_fold_cross_validation(cf, covMatrix, 'T', event_col='E', k=3)
print scores
print np.mean(scores)
print np.std(scores)

"""

Exemplo n.º 40

0

Exibir arquivo

Arquivo: cph_run.py Projeto: jaredleekatzman/DeepSurv

if __name__ == '__main__':
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)
    args = parse_args()
    print("Arguments:",args)

    # Load Dataset
    print("Loading datasets: " + args.dataset)
    datasets = utils.load_datasets(args.dataset)

    # Train CPH model
    print("Training CPH Model")
    train_df = utils.format_dataset_to_df(datasets['train'], DURATION_COL, EVENT_COL)
    cf = CoxPHFitter()
    results = cf.fit(train_df, duration_col=DURATION_COL, event_col=EVENT_COL, 
        include_likelihood=True)
    cf.print_summary()
    print("Train Likelihood: " + str(cf._log_likelihood))

    if 'valid' in datasets:
        metrics = evaluate_model(cf, datasets['valid'])
        print("Valid metrics: " + str(metrics))

    if 'test' in datasets:
        metrics = evaluate_model(cf, datasets['test'], bootstrap=True)
        print("Test metrics: " + str(metrics))

    print("Saving Visualizations")
    if 'test' in datasets and args.treatment_idx is not None:
        print("Calculating treatment recommendation survival curvs")
        # We use the test dataset because these experiments don't have a viz dataset

Exemplo n.º 41

0

Exibir arquivo

Arquivo: survival.py Projeto: hammerlab/cohorts

def _plot_kmf_single(df,
                     condition_col,
                     survival_col,
                     censor_col,
                     threshold,
                     title,
                     xlabel,
                     ylabel,
                     ax,
                     with_condition_color,
                     no_condition_color,
                     with_condition_label,
                     no_condition_label,
                     color_map,
                     label_map,
                     color_palette,
                     ci_show,
                     print_as_title):
    """
    Helper function to produce a single KM survival plot, among observations in df by groups defined by condition_col.

    All inputs are required - this function is intended to be called by `plot_kmf`.
    """
    # make color inputs consistent hex format
    if colors.is_color_like(with_condition_color):
        with_condition_color = colors.to_hex(with_condition_color)
    if colors.is_color_like(no_condition_color):
        no_condition_color = colors.to_hex(no_condition_color)
    ## prepare data to be plotted; producing 3 outputs:
    # - `condition`, series containing category labels to be plotted
    # - `label_map` (mapping condition values to plot labels)
    # - `color_map` (mapping condition values to plotted colors)
    if threshold is not None:
        is_median = threshold == "median"
        if is_median:
            threshold = df[condition_col].median()
        label_suffix = float_str(threshold)
        condition = df[condition_col] > threshold
        default_label_no_condition = "%s ≤ %s" % (condition_col, label_suffix)
        if is_median:
            label_suffix += " (median)"
        default_label_with_condition = "%s > %s" % (condition_col, label_suffix)
        with_condition_label = with_condition_label or default_label_with_condition
        no_condition_label = no_condition_label or default_label_no_condition
        if not label_map:
            label_map = {False: no_condition_label,
                         True: with_condition_label}
        if not color_map:
            color_map = {False: no_condition_color,
                         True: with_condition_color}
    elif df[condition_col].dtype == 'O' or df[condition_col].dtype.name == "category":
        condition = df[condition_col].astype("category")
        if not label_map:
            label_map = dict()
            [label_map.update({condition_value: '{} = {}'.format(condition_col,
                                                        condition_value)})
                     for condition_value in condition.unique()]
        if not color_map:
            rgb_values = sb.color_palette(color_palette, len(label_map.keys()))
            hex_values = [colors.to_hex(col) for col in rgb_values]
            color_map = dict(zip(label_map.keys(), hex_values))
    elif df[condition_col].dtype == 'bool':
        condition = df[condition_col]
        default_label_with_condition = "= {}".format(condition_col)
        default_label_no_condition = "¬ {}".format(condition_col)
        with_condition_label = with_condition_label or default_label_with_condition
        no_condition_label = no_condition_label or default_label_no_condition
        if not label_map:
            label_map = {False: no_condition_label,
                         True: with_condition_label}
        if not color_map:
            color_map = {False: no_condition_color,
                         True: with_condition_color}
    else:
        raise ValueError('Don\'t know how to plot data of type\
                         {}'.format(df[condition_col].dtype))

    # produce kmf plot for each category (group) identified above
    kmf = KaplanMeierFitter()
    grp_desc = list()
    grp_survival_data = dict()
    grp_event_data = dict()
    grp_names = list(condition.unique())
    for grp_name, grp_df in df.groupby(condition):
        grp_survival = grp_df[survival_col]
        grp_event = (grp_df[censor_col].astype(bool))
        grp_label = label_map[grp_name]
        grp_color = color_map[grp_name]
        kmf.fit(grp_survival, grp_event, label=grp_label)
        desc_str = "# {}: {}".format(grp_label, len(grp_survival))
        grp_desc.append(desc_str)
        grp_survival_data[grp_name] = grp_survival
        grp_event_data[grp_name] = grp_event
        if ax:
            ax = kmf.plot(ax=ax, show_censors=True, ci_show=ci_show, color=grp_color)
        else:
            ax = kmf.plot(show_censors=True, ci_show=ci_show, color=grp_color)

    ## format the plot
    # Set the y-axis to range 0 to 1
    ax.set_ylim(0, 1)
    y_tick_vals = ax.get_yticks()
    ax.set_yticklabels(["%d" % int(y_tick_val * 100) for y_tick_val in y_tick_vals])
    # plot title
    if title:
        ax.set_title(title)
    elif print_as_title:
        ax.set_title(' | '.join(grp_desc))
    else:
        [print(desc) for desc in grp_desc]
    # axis labels
    if xlabel:
        ax.set_xlabel(xlabel)
    if ylabel:
        ax.set_ylabel(ylabel)
    
    ## summarize analytical version of results
    ## again using same groups as are plotted
    if len(grp_names) == 2:
        # use log-rank test for 2 groups
        results = logrank_test(grp_survival_data[grp_names[0]],
                               grp_survival_data[grp_names[1]],
                               event_observed_A=grp_event_data[grp_names[0]],
                               event_observed_B=grp_event_data[grp_names[1]])
    elif len(grp_names) == 1:
        # no analytical result for 1 or 0 groups
        results = NullSurvivalResults()
    else:
        # cox PH fitter for >2 groups
        cf = CoxPHFitter()
        cox_df = patsy.dmatrix('+'.join([condition_col, survival_col,
                                         censor_col]),
                               df, return_type='dataframe')
        del cox_df['Intercept']
        results = cf.fit(cox_df, survival_col, event_col=censor_col)
        results.print_summary()
    # add metadata to results object so caller can print them
    results.survival_data_series = grp_survival_data
    results.event_data_series = grp_event_data
    results.desc = grp_desc
    return results