Exemplo n.º 1
0
# intersect data
c, ia, ib = intersect_mtlb(fam['iid'],pheno['IID'])
logger.info(str(len(ia)) + ' subjects found to have genotype data\n')

# Final sample assignment
pheno = pheno.iloc[ib]
pheno = pheno.reset_index(drop=True)
geno_ia = geno[:,ia]

# Function for null model
logger.info('Generating Null models\n')
cph = CoxPHFitter()
cph.fit(pheno[[T_name, event_name] + covname], T_name, event_col=event_name)
# res_surv = cph.compute_residuals(pheno[[T_name, event_name] + covname], 'deviance').sort_index()['deviance']
res_surv = cph.compute_residuals(pheno[[T_name, event_name] + covname], 'martingale').sort_index()['martingale']

# This is the most memory intensive part. Might need to change if we are dealing with biobank scale data

if args.apr_flag == 'N':
  logger.info('Calculating Null covariance matrix\n')
  mat = cph.predict_cumulative_hazard(pheno)
  P = np.diff(mat,axis=-0)
  for isubj in range(P.shape[1]):
    idx = np.abs(mat.index - pheno[T_name][isubj]).argmin()
    P[idx::,isubj] = 0
  V = da.diag(np.array(pheno[event_name] - res_surv)) - da.dot(P.transpose(),P)
  X = np.array(pheno[covname])
  C = V - da.matmul(da.matmul(da.matmul(V,X), da.linalg.inv(da.matmul(da.matmul(X.transpose(), V),X))), da.matmul(X.transpose(), V))
else:
  logger.info('Using first order approximations for testing statistics\n')