def _get_gradients(self, X, events, start, stop, weights, beta): # pylint: disable=too-many-locals """ Calculates the first and second order vector differentials, with respect to beta. Returns ------- hessian: (d, d) numpy array, gradient: (d,) numpy array log_likelihood: float """ _, d = X.shape hessian = np.zeros((d, d)) gradient = np.zeros(d) log_lik = 0 # weights = weights[:, None] unique_death_times = np.unique(stop[events]) for t in unique_death_times: # I feel like this can be made into some tree-like structure ix = (start < t) & (t <= stop) X_at_t = X[ix] weights_at_t = weights[ix] stops_events_at_t = stop[ix] events_at_t = events[ix] phi_i = weights_at_t * np.exp(np.dot(X_at_t, beta)) phi_x_i = phi_i[:, None] * X_at_t phi_x_x_i = np.dot(X_at_t.T, phi_x_i) # Calculate sums of Risk set risk_phi = array_sum_to_scalar(phi_i) risk_phi_x = matrix_axis_0_sum_to_array(phi_x_i) risk_phi_x_x = phi_x_x_i # Calculate the sums of Tie set deaths = events_at_t & (stops_events_at_t == t) tied_death_counts = array_sum_to_scalar(deaths.astype(int)) # should always at least 1 xi_deaths = X_at_t[deaths] x_death_sum = matrix_axis_0_sum_to_array(weights_at_t[deaths, None] * xi_deaths) weight_count = array_sum_to_scalar(weights_at_t[deaths]) weighted_average = weight_count / tied_death_counts # # This code is near identical to the _batch algorithm in CoxPHFitter. In fact, see _batch for comments. # if tied_death_counts > 1: # A good explaination for how Efron handles ties. Consider three of five subjects who fail at the time. # As it is not known a priori that who is the first to fail, so one-third of # (φ1 + φ2 + φ3) is adjusted from sum_j^{5} φj after one fails. Similarly two-third # of (φ1 + φ2 + φ3) is adjusted after first two individuals fail, etc. # a lot of this is now in einstien notation for performance, but see original "expanded" code here # https://github.com/CamDavidsonPilon/lifelines/blob/e7056e7817272eb5dff5983556954f56c33301b1/lifelines/fitters/cox_time_varying_fitter.py#L458-L490 tie_phi = array_sum_to_scalar(phi_i[deaths]) tie_phi_x = matrix_axis_0_sum_to_array(phi_x_i[deaths]) tie_phi_x_x = np.dot(xi_deaths.T, phi_i[deaths, None] * xi_deaths) increasing_proportion = np.arange(tied_death_counts) / tied_death_counts denom = 1.0 / (risk_phi - increasing_proportion * tie_phi) numer = risk_phi_x - np.outer(increasing_proportion, tie_phi_x) a1 = np.einsum("ab, i->ab", risk_phi_x_x, denom) - np.einsum( "ab, i->ab", tie_phi_x_x, increasing_proportion * denom ) else: # no tensors here, but do some casting to make it easier in the converging step next. denom = 1.0 / np.array([risk_phi]) numer = risk_phi_x a1 = risk_phi_x_x * denom summand = numer * denom[:, None] a2 = summand.T.dot(summand) gradient = gradient + x_death_sum - weighted_average * summand.sum(0) log_lik = log_lik + np.dot(x_death_sum, beta) + weighted_average * np.log(denom).sum() hessian = hessian + weighted_average * (a2 - a1) return hessian, gradient, log_lik
def _get_gradients(self, X, events, start, stop, weights, beta): # pylint: disable=too-many-locals """ Calculates the first and second order vector differentials, with respect to beta. Returns ------- hessian: (d, d) numpy array, gradient: (d,) numpy array log_likelihood: float """ _, d = X.shape hessian = np.zeros((d, d)) gradient = np.zeros(d) log_lik = 0 # weights = weights[:, None] unique_death_times = np.unique(stop[events]) for t in unique_death_times: # I feel like this can be made into some tree-like structure ix = (start < t) & (t <= stop) X_at_t = X[ix] weights_at_t = weights[ix] stops_events_at_t = stop[ix] events_at_t = events[ix] phi_i = weights_at_t * np.exp(np.dot(X_at_t, beta)) phi_x_i = phi_i[:, None] * X_at_t phi_x_x_i = np.dot(X_at_t.T, phi_x_i) # Calculate sums of Risk set risk_phi = array_sum_to_scalar(phi_i) risk_phi_x = matrix_axis_0_sum_to_array(phi_x_i) risk_phi_x_x = phi_x_x_i # Calculate the sums of Tie set deaths = events_at_t & (stops_events_at_t == t) tied_death_counts = array_sum_to_scalar( deaths.astype(int)) # should always at least 1 xi_deaths = X_at_t[deaths] x_death_sum = matrix_axis_0_sum_to_array( weights_at_t[deaths, None] * xi_deaths) weight_count = array_sum_to_scalar(weights_at_t[deaths]) weighted_average = weight_count / tied_death_counts # # This code is near identical to the _batch algorithm in CoxPHFitter. In fact, see _batch for comments. # if tied_death_counts > 1: # A good explaination for how Efron handles ties. Consider three of five subjects who fail at the time. # As it is not known a priori that who is the first to fail, so one-third of # (φ1 + φ2 + φ3) is adjusted from sum_j^{5} φj after one fails. Similarly two-third # of (φ1 + φ2 + φ3) is adjusted after first two individuals fail, etc. # a lot of this is now in einstien notation for performance, but see original "expanded" code here # https://github.com/CamDavidsonPilon/lifelines/blob/e7056e7817272eb5dff5983556954f56c33301b1/lifelines/fitters/cox_time_varying_fitter.py#L458-L490 tie_phi = array_sum_to_scalar(phi_i[deaths]) tie_phi_x = matrix_axis_0_sum_to_array(phi_x_i[deaths]) tie_phi_x_x = np.dot(xi_deaths.T, phi_i[deaths, None] * xi_deaths) increasing_proportion = np.arange( tied_death_counts) / tied_death_counts denom = 1.0 / (risk_phi - increasing_proportion * tie_phi) numer = risk_phi_x - np.outer(increasing_proportion, tie_phi_x) a1 = np.einsum("ab, i->ab", risk_phi_x_x, denom) - np.einsum( "ab, i->ab", tie_phi_x_x, increasing_proportion * denom) else: # no tensors here, but do some casting to make it easier in the converging step next. denom = 1.0 / np.array([risk_phi]) numer = risk_phi_x a1 = risk_phi_x_x * denom summand = numer * denom[:, None] a2 = summand.T.dot(summand) gradient = gradient + x_death_sum - weighted_average * summand.sum( 0) log_lik = log_lik + np.dot( x_death_sum, beta) + weighted_average * np.log(denom).sum() hessian = hessian + weighted_average * (a2 - a1) return hessian, gradient, log_lik