Exemplo n.º 1
0
from lifelines import CoxPHFitter
import numpy as np
import pandas as pd
import lifelines

path = './totalData.xlsx'

data = pd.read_excel(path)
col = list(data.columns)
del col[0:5]

cph = CoxPHFitter()
cph.fit(data, 'totaltime', 'failure', strata=col)

cph.print_summary()
cph.predict_cumulative_hazard(data)
Exemplo n.º 2
0
pheno = pheno.iloc[ib]
pheno = pheno.reset_index(drop=True)
geno_ia = geno[:,ia]

# Function for null model
logger.info('Generating Null models\n')
cph = CoxPHFitter()
cph.fit(pheno[[T_name, event_name] + covname], T_name, event_col=event_name)
# res_surv = cph.compute_residuals(pheno[[T_name, event_name] + covname], 'deviance').sort_index()['deviance']
res_surv = cph.compute_residuals(pheno[[T_name, event_name] + covname], 'martingale').sort_index()['martingale']

# This is the most memory intensive part. Might need to change if we are dealing with biobank scale data

if args.apr_flag == 'N':
  logger.info('Calculating Null covariance matrix\n')
  mat = cph.predict_cumulative_hazard(pheno)
  P = np.diff(mat,axis=-0)
  for isubj in range(P.shape[1]):
    idx = np.abs(mat.index - pheno[T_name][isubj]).argmin()
    P[idx::,isubj] = 0
  V = da.diag(np.array(pheno[event_name] - res_surv)) - da.dot(P.transpose(),P)
  X = np.array(pheno[covname])
  C = V - da.matmul(da.matmul(da.matmul(V,X), da.linalg.inv(da.matmul(da.matmul(X.transpose(), V),X))), da.matmul(X.transpose(), V))
else:
  logger.info('Using first order approximations for testing statistics\n')

# auto chunk to reduce the query time
chunk_array = [bim.i.values[i:i + chunk_size] for i in xrange(0, len(bim.i.values), chunk_size)]  
nchunk = len(chunk_array)
chunk_ind = 1
Exemplo n.º 3
0
        sorted_y_test = np.sort(np.unique(y_test[:, 0]))
        if sorted_y_test[0] != 0:
            mesh_points = np.concatenate(([0.], sorted_y_test))
        else:
            mesh_points = sorted_y_test
        surv = \
            surv_model.predict_survival_function(X_test_standardized,
                                                 mesh_points)
        surv = surv.values.T

        # ---------------------------------------------------------------------
        # compute c-index
        #
        if cindex_method == 'cum_haz':
            cum_haz = \
                surv_model.predict_cumulative_hazard(X_test_standardized,
                                                     sorted_y_test)
            cum_haz = cum_haz.values.T
            cum_hazard_scores = cum_haz.sum(axis=1)
            test_cindex = concordance_index(y_test[:, 0],
                                            -cum_hazard_scores,
                                            y_test[:, 1])
        elif cindex_method == 'cum_haz_from_surv':
            surv_thresholded = np.maximum(surv,
                                          np.finfo(float).eps)
            cum_haz = -np.log(surv_thresholded)
            cum_hazard_scores = cum_haz.sum(axis=1)
            test_cindex = concordance_index(y_test[:, 0],
                                            -cum_hazard_scores,
                                            y_test[:, 1])
        elif cindex_method == 'median':
            predicted_medians = \