def nll_pmf(phi, idx_durations, events, reduction='mean', epsilon=1e-7): """Negative log-likelihood for the PMF parametrized model [1]. Arguments: phi {torch.tensor} -- Estimates in (-inf, inf), where pmf = somefunc(phi). idx_durations {torch.tensor} -- Event times represented as indices. events {torch.tensor} -- Indicator of event (1.) or censoring (0.). Same length as 'idx_durations'. reduction {string} -- How to reduce the loss. 'none': No reduction. 'mean': Mean of tensor. 'sum: sum. Returns: torch.tensor -- The negative log-likelihood. References: [1] Håvard Kvamme and Ørnulf Borgan. Continuous and Discrete-Time Survival Prediction with Neural Networks. arXiv preprint arXiv:1910.06724, 2019. https://arxiv.org/pdf/1910.06724.pdf """ if (idx_durations.max()) >= phi.shape[1]: raise ValueError("""'t_idx' too large. Probably need to increase output size of net.""") events = events.view(-1) idx_durations = idx_durations.view(-1, 1) phi = utils.pad_col(phi) gamma = phi.max(1)[0] cumsum = phi.sub(gamma.view(-1, 1)).exp().cumsum(1) sum_ = cumsum[:, -1] part1 = phi.gather(1, idx_durations).view(-1).sub(gamma).mul(events) part2 = - sum_.relu().add(epsilon).log() part3 = sum_.sub(cumsum.gather(1, idx_durations).view(-1)).relu().add(epsilon).log().mul(1. - events) # need relu() in part3 (and possibly part2) because cumsum on gpu has some bugs and we risk getting negative numbers. loss = - part1.add(part2).add(part3) return _reduction(loss, reduction)
def _hazard_const_haz(self, input, batch_size=8224, numpy=None, eval_=True, to_cpu=False, num_workers=0): """Computes the continuous-time constant hazard interpolation. Essentially we what the discrete survival estimates to match the continuous time at the knots. So essentially we want $$S(tau_j) = prod_{k=1}^j [1 - h_k] = prod_{k=1}{j} exp[-eta_k].$$ where $h_k$ is the discrete hazard estimates and $eta_k$ continuous time hazards multiplied with the length of the duration interval as they are defined for the PC-Hazard method. Thus we get $$eta_k = - log[1 - h_k]$$ which can be divided by the length of the time interval to get the continuous time hazards. """ haz_orig = self.model.predict_hazard(input, batch_size, False, eval_, to_cpu, num_workers) haz = (1 - haz_orig).add( self.epsilon).log().mul(-1).relu()[:, 1:].contiguous() n = haz.shape[0] haz = haz.view(-1, 1).repeat(1, self.sub).view(n, -1).div(self.sub) haz = utils.pad_col(haz, where='start') haz[:, 0] = haz_orig[:, 0] return tt.utils.array_or_tensor(haz, numpy, input)
def predict_pmf(self, input, batch_size=8224, numpy=None, eval_=True, to_cpu=False, num_workers=0): """Predict the probability mass function (PMF) for `input`. Arguments: input {tuple, np.ndarray, or torch.tensor} -- Input to net. Keyword Arguments: batch_size {int} -- Batch size (default: {8224}) numpy {bool} -- 'False' gives tensor, 'True' gives numpy, and None give same as input (default: {None}) eval_ {bool} -- If 'True', use 'eval' mode on net. (default: {True}) grads {bool} -- If gradients should be computed (default: {False}) to_cpu {bool} -- For larger data sets we need to move the results to cpu (default: {False}) num_workers {int} -- Number of workers in created dataloader (default: {0}) Returns: [np.ndarray or tensor] -- Predictions """ preds = self.predict(input, batch_size, False, eval_, False, to_cpu, num_workers) pmf = pad_col(preds).softmax(1)[:, :-1] return array_or_tensor(pmf, numpy, input)
def nll_pmf_cr(phi, idx_durations, events, reduction='mean', epsilon=1e-7): """Negative log-likelihood for PMF parameterizations. `phi` is the ''logit''. Arguments: phi {torch.tensor} -- Predictions as float tensor with shape [batch, n_risks, n_durations] all in (-inf, inf). idx_durations {torch.tensor} -- Int tensor with index of durations. events {torch.tensor} -- Int tensor with event types. {0: Censored, 1: first group, ..., n_risks: n'th risk group}. Keyword Arguments: reduction {string} -- How to reduce the loss. 'none': No reduction. 'mean': Mean of tensor. else: sum. Returns: torch.tensor -- Negative log-likelihood. """ # Should improve numerical stability by, e.g., log-sum-exp trick. events = events.view(-1) - 1 event_01 = (events != -1).float() idx_durations = idx_durations.view(-1) batch_size = phi.size(0) sm = utils.pad_col(phi.view(batch_size, -1)).softmax(1)[:, :-1].view(phi.shape) index = torch.arange(batch_size) part1 = sm[index, events, idx_durations].relu().add(epsilon).log().mul(event_01) part2 = (1 - sm.cumsum(2)[index, :, idx_durations].sum(1)).relu().add(epsilon).log().mul(1 - event_01) loss = - part1.add(part2) return _reduction(loss, reduction)
def predict_hazard(self, input, batch_size=8224, numpy=None, eval_=True, to_cpu=False, num_workers=0): """Predict the hazard function for `input`. Arguments: input {tuple, np.ndarra, or torch.tensor} -- Input to net. Keyword Arguments: batch_size {int} -- Batch size (default: {8224}) numpy {bool} -- 'False' gives tensor, 'True' gives numpy, and None give same as input (default: {None}) eval_ {bool} -- If 'True', use 'eval' mode on net. (default: {True}) to_cpu {bool} -- For larger data sets we need to move the results to cpu (default: {False}) num_workers {int} -- Number of workers in created dataloader (default: {0}) Returns: [np.ndarray or tensor] -- Predicted hazards """ preds = self.predict(input, batch_size, False, eval_, False, to_cpu, num_workers) n = preds.shape[0] hazard = F.softplus(preds).view(-1, 1).repeat(1, self.sub).view( n, -1).div(self.sub) hazard = pad_col(hazard, where='start') return tt.utils.array_or_tensor(hazard, numpy, input)
def rank_loss_deephit_cr(phi: Tensor, idx_durations: Tensor, events: Tensor, rank_mat: Tensor, sigma: float, reduction: str = 'mean') -> Tensor: """Rank loss proposed by DeepHit authors for competing risks [1]. Arguments: phi {torch.tensor} -- Predictions as float tensor with shape [batch, n_risks, n_durations] all in (-inf, inf). idx_durations {torch.tensor} -- Int tensor with index of durations. events {torch.tensor} -- Int tensor with event types. {0: Censored, 1: first group, ..., n_risks: n'th risk group}. rank_mat {torch.tensor} -- See pair_rank_mat function. sigma {float} -- Sigma from DeepHit paper, chosen by you. Keyword Arguments: reduction {string} -- How to reduce the loss. 'none': No reduction. 'mean': Mean of tensor. else: sum. Returns: torch.tensor -- Rank loss. References: [1] Changhee Lee, William R Zame, Jinsung Yoon, and Mihaela van der Schaar. Deephit: A deep learning approach to survival analysis with competing risks. In Thirty-Second AAAI Conference on Artificial Intelligence, 2018. http://medianetlab.ee.ucla.edu/papers/AAAI_2018_DeepHit """ idx_durations = idx_durations.view(-1) events = events.view(-1) - 1 event_01 = (events == -1).float() batch_size, n_risks = phi.shape[:2] pmf = utils.pad_col(phi.view(batch_size, -1)).softmax(1) pmf = pmf[:, :-1].view(phi.shape) y = torch.zeros_like(pmf) y[torch.arange(batch_size), :, idx_durations] = 1. loss = [] for i in range(n_risks): rank_loss_i = _rank_loss_deephit(pmf[:, i, :], y[:, i, :], rank_mat, sigma, 'none') loss.append(rank_loss_i.view(-1) * (events == i).float()) if reduction == 'none': return sum(loss) elif reduction == 'mean': return sum([lo.mean() for lo in loss]) elif reduction == 'sum': return sum([lo.sum() for lo in loss]) return _reduction(loss, reduction)
def predict_pmf(self, input, batch_size=8224, numpy=None, eval_=True, to_cpu=False, num_workers=0): preds = self.predict(input, batch_size, False, eval_, False, to_cpu, num_workers) pmf = pad_col(preds).softmax(1)[:, :-1] return tt.utils.array_or_tensor(pmf, numpy, input)
def _surv_const_haz(self, input, batch_size=8224, numpy=None, eval_=True, to_cpu=False, num_workers=0): haz = self._hazard_const_haz(input, batch_size, False, eval_, to_cpu, num_workers) surv_0 = 1 - haz[:, :1] surv = utils.pad_col(haz[:, 1:], where='start').cumsum(1).mul(-1).exp().mul(surv_0) return tt.utils.array_or_tensor(surv, numpy, input)
def nll_pc_hazard_loss(phi: Tensor, idx_durations: Tensor, events: Tensor, interval_frac: Tensor, reduction: str = 'mean') -> Tensor: """Negative log-likelihood of the PC-Hazard parametrization model [1]. Arguments: phi {torch.tensor} -- Estimates in (-inf, inf), where hazard = sigmoid(phi). idx_durations {torch.tensor} -- Event times represented as indices. events {torch.tensor} -- Indicator of event (1.) or censoring (0.). Same length as 'idx_durations'. interval_frac {torch.tensor} -- Fraction of last interval before event/censoring. reduction {string} -- How to reduce the loss. 'none': No reduction. 'mean': Mean of tensor. 'sum: sum. Returns: torch.tensor -- The negative log-likelihood. References: [1] Håvard Kvamme and Ørnulf Borgan. Continuous and Discrete-Time Survival Prediction with Neural Networks. arXiv preprint arXiv:1910.06724, 2019. https://arxiv.org/pdf/1910.06724.pdf """ if events.dtype is torch.bool: events = events.float() idx_durations = idx_durations.view(-1, 1) events = events.view(-1) interval_frac = interval_frac.view(-1) keep = idx_durations.view(-1) >= 0 phi = phi[keep, :] idx_durations = idx_durations[keep, :] events = events[keep] interval_frac = interval_frac[keep] # log_h_e = F.softplus(phi.gather(1, idx_durations).view(-1)).log().mul(events) log_h_e = utils.log_softplus(phi.gather( 1, idx_durations).view(-1)).mul(events) haz = F.softplus(phi) scaled_h_e = haz.gather(1, idx_durations).view(-1).mul(interval_frac) haz = utils.pad_col(haz, where='start') sum_haz = haz.cumsum(1).gather(1, idx_durations).view(-1) loss = -log_h_e.sub(scaled_h_e).sub(sum_haz) return _reduction(loss, reduction)
def predict_pmf(self, input, batch_size=8224, numpy=None, eval_=True, to_cpu=False, num_workers=0): if not self.scheme in ['const_pdf', 'lin_surv']: raise NotImplementedError pmf = self.model.predict_pmf(input, batch_size, False, eval_, to_cpu, num_workers) n, m = pmf.shape pmf_cdi = pmf[:, 1:].contiguous().view(-1, 1).repeat(1, self.sub).div( self.sub).view(n, -1) pmf_cdi = utils.pad_col(pmf_cdi, where='start') pmf_cdi[:, 0] = pmf[:, 0] return tt.utils.array_or_tensor(pmf_cdi, numpy, input)
def nll_pmf(phi: Tensor, idx_durations: Tensor, events: Tensor, reduction: str = 'mean', epsilon: float = 1e-7) -> Tensor: """Negative log-likelihood for the PMF parametrized model [1]. Arguments: phi {torch.tensor} -- Estimates in (-inf, inf), where pmf = somefunc(phi). idx_durations {torch.tensor} -- Event times represented as indices. events {torch.tensor} -- Indicator of event (1.) or censoring (0.). Same length as 'idx_durations'. reduction {string} -- How to reduce the loss. 'none': No reduction. 'mean': Mean of tensor. 'sum: sum. Returns: torch.tensor -- The negative log-likelihood. References: [1] Håvard Kvamme and Ørnulf Borgan. Continuous and Discrete-Time Survival Prediction with Neural Networks. arXiv preprint arXiv:1910.06724, 2019. https://arxiv.org/pdf/1910.06724.pdf """ if phi.shape[1] <= idx_durations.max(): raise ValueError( f"Network output `phi` is too small for `idx_durations`." + f" Need at least `phi.shape[1] = {idx_durations.max().item()+1}`," + f" but got `phi.shape[1] = {phi.shape[1]}`") if events.dtype is torch.bool: events = events.float() events = events.view(-1) idx_durations = idx_durations.view(-1, 1) phi = utils.pad_col(phi) gamma = phi.max(1)[0] cumsum = phi.sub(gamma.view(-1, 1)).exp().cumsum(1) sum_ = cumsum[:, -1] part1 = phi.gather(1, idx_durations).view(-1).sub(gamma).mul(events) part2 = -sum_.relu().add(epsilon).log() part3 = sum_.sub(cumsum.gather( 1, idx_durations).view(-1)).relu().add(epsilon).log().mul(1. - events) # need relu() in part3 (and possibly part2) because cumsum on gpu has some bugs and we risk getting negative numbers. loss = -part1.add(part2).add(part3) return _reduction(loss, reduction)
def rank_loss_deephit_single(phi: Tensor, idx_durations: Tensor, events: Tensor, rank_mat: Tensor, sigma: Tensor, reduction: str = 'mean') -> Tensor: """Rank loss proposed by DeepHit authors [1] for a single risks. Arguments: pmf {torch.tensor} -- Matrix with probability mass function pmf_ij = f_i(t_j) y {torch.tensor} -- Matrix with indicator of duration and censoring time. rank_mat {torch.tensor} -- See pair_rank_mat function. sigma {float} -- Sigma from DeepHit paper, chosen by you. Arguments: phi {torch.tensor} -- Predictions as float tensor with shape [batch, n_durations] all in (-inf, inf). idx_durations {torch.tensor} -- Int tensor with index of durations. events {torch.tensor} -- Float indicator of event or censoring (1 is event). rank_mat {torch.tensor} -- See pair_rank_mat function. sigma {float} -- Sigma from DeepHit paper, chosen by you. Keyword Arguments: reduction {string} -- How to reduce the loss. 'none': No reduction. 'mean': Mean of tensor. 'sum': sum. Returns: torch.tensor -- Rank loss. References: [1] Changhee Lee, William R Zame, Jinsung Yoon, and Mihaela van der Schaar. Deephit: A deep learning approach to survival analysis with competing risks. In Thirty-Second AAAI Conference on Artificial Intelligence, 2018. http://medianetlab.ee.ucla.edu/papers/AAAI_2018_DeepHit """ idx_durations = idx_durations.view(-1, 1) # events = events.float().view(-1) pmf = utils.pad_col(phi).softmax(1) y = torch.zeros_like(pmf).scatter(1, idx_durations, 1.) # one-hot rank_loss = _rank_loss_deephit(pmf, y, rank_mat, sigma, reduction) return rank_loss
def test_pad_col_start(val): x = torch.ones((2, 3)) x_pad = pad_col(x, val, where='start') pad = torch.ones(2, 1) * val assert (x_pad == torch.cat([pad, x], dim=1)).all()
def test_pad_col_end(val): x = torch.ones((2, 3)) x_pad = pad_col(x, val) pad = torch.ones(2, 1) * val assert (x_pad == torch.cat([x, pad], dim=1)).all()