def _index_tensor(x: Tensor, item: Any) -> Tensor: """""" squeeze: List[int] = [] if not isinstance(item, tuple): item = (item, ) saw_ellipsis = False for i, item_i in enumerate(item): axis = i - len(item) if saw_ellipsis else i if isinstance(item_i, int): if item_i != -1: x = x.slice_axis(axis=axis, begin=item_i, end=item_i + 1) else: x = x.slice_axis(axis=axis, begin=-1, end=None) squeeze.append(axis) elif item_i == slice(None): continue elif item_i == Ellipsis: saw_ellipsis = True continue elif isinstance(item_i, slice): assert item_i.step is None start = item_i.start if item_i.start is not None else 0 x = x.slice_axis(axis=axis, begin=start, end=item_i.stop) else: raise RuntimeError(f"invalid indexing item: {item}") if len(squeeze): x = x.squeeze(axis=tuple(squeeze)) return x
def __init__( self, alpha: Tensor, beta: Tensor, zero_probability: Tensor, one_probability: Tensor, ) -> None: F = getF(alpha) self.alpha = alpha self.beta = beta self.zero_probability = zero_probability self.one_probability = one_probability self.beta_probability = 1 - zero_probability - one_probability self.beta_distribution = Beta(alpha=alpha, beta=beta) mixture_probs = F.stack(zero_probability, one_probability, self.beta_probability, axis=-1) super().__init__( components=[ Deterministic(alpha.zeros_like()), Deterministic(alpha.ones_like()), self.beta_distribution, ], mixture_probs=mixture_probs, )
def _compute_edges(F, bin_centers: Tensor) -> Tensor: r""" Computes the edges of the bins based on the centers. The first and last edge are set to :math:`10^{-10}` and :math:`10^{10}`, repsectively. Parameters ---------- F bin_centers Tensor of shape `(*batch_shape, num_bins)`. Returns ------- Tensor Tensor of shape (*batch.shape, num_bins+1) """ low = ( F.zeros_like(bin_centers.slice_axis(axis=-1, begin=0, end=1)) - 1.0e10 ) high = ( F.zeros_like(bin_centers.slice_axis(axis=-1, begin=0, end=1)) + 1.0e10 ) means = ( F.broadcast_add( bin_centers.slice_axis(axis=-1, begin=1, end=None), bin_centers.slice_axis(axis=-1, begin=0, end=-1), ) / 2.0 ) return F.concat(low, means, high, dim=-1)
def fit(cls, F, samples: Tensor, rank: int = 0) -> Distribution: """ Returns an instance of `LowrankMultivariateGaussian` after fitting parameters to the given data. Only the special case of `rank` = 0 is supported at the moment. Parameters ---------- F samples Tensor of shape (num_samples, batch_size, seq_len, target_dim) rank Rank of W Returns ------- Distribution instance of type `LowrankMultivariateGaussian`. """ # TODO: Implement it for the general case: `rank` > 0 assert rank == 0, "Fit is not only implemented for the case rank = 0!" # Compute mean and variances mu = samples.mean(axis=0) var = F.square(samples - samples.mean(axis=0)).mean(axis=0) return cls(dim=samples.shape[-1], rank=rank, mu=mu, D=var)
def s(mu: Tensor, D: Tensor, W: Tensor) -> Tensor: F = getF(mu) samples_D = F.sample_normal(mu=F.zeros_like(mu), sigma=F.ones_like(mu), dtype=dtype) cov_D = D.sqrt() * samples_D # dummy only use to get the shape (..., rank, 1) dummy_tensor = F.linalg_gemm2(W, mu.expand_dims(axis=-1), transpose_a=True).squeeze(axis=-1) samples_W = F.sample_normal( mu=F.zeros_like(dummy_tensor), sigma=F.ones_like(dummy_tensor), dtype=dtype, ) cov_W = F.linalg_gemm2( W, samples_W.expand_dims(axis=-1)).squeeze(axis=-1) samples = mu + cov_D + cov_W return samples
def quantile(self, level: Tensor) -> Tensor: F = self.F for _ in range(self.all_dim): level = level.expand_dims(axis=-1) condition = F.broadcast_greater(level, level.zeros_like() + 0.5) u = F.where(condition, F.log(2.0 * level), -F.log(2.0 - 2.0 * level)) return F.broadcast_add(self.mu, F.broadcast_mul(self.b, u))
def compute_scale(self, F, data: Tensor, observed_indicator: Tensor) -> Tensor: """ Parameters ---------- F A module that can either refer to the Symbol API or the NDArray API in MXNet. data tensor containing the data to be scaled. observed_indicator observed_indicator: binary tensor with the same shape as ``data``, that has 1 in correspondence of observed data points, and 0 in correspondence of missing data points. Returns ------- Tensor shape (N, T, C) or (N, C, T) scaled along the specified axis. """ axis_zero = nd.prod( data == data.zeros_like(), self.axis, keepdims=True ) # Along the specified axis, which array are always at zero axis_zero = nd.broadcast_to( axis_zero, shape=data.shape) # Broadcast it to the shape of data min_val = nd.where( 1 - observed_indicator, nd.broadcast_to(data.max(keepdims=True), shape=data.shape), data, ).min( axis=self.axis, keepdims=True ) # return the min value along specified axis while ignoring value according to observed_indicator max_val = nd.where( 1 - observed_indicator, nd.broadcast_to(data.min(keepdims=True), shape=data.shape), data, ).max( axis=self.axis, keepdims=True ) # return the max value along specified axis while ignoring value according to observed_indicator scaled_data = (data - min_val) / (max_val - min_val) # Rescale scaled_data = nd.where( axis_zero, scaled_data.zeros_like(), scaled_data ) # Clip Nan values to zero if the data was equal to zero along specified axis scaled_data = nd.where( scaled_data != scaled_data, scaled_data.ones_like(), scaled_data ) # Clip the Nan values to one. scaled_date!=scaled_data tells us where the Nan values are in scaled_data return nd.where( 1 - observed_indicator, scaled_data.zeros_like(), scaled_data ) # Replace data with zero where observed_indicator tells us to.
def exact_inference(self, x_train: Tensor, y_train: Tensor, x_test: Tensor) -> Tuple[Tensor, Tensor, Tensor]: """ Parameters ---------- x_train Training set of features of shape (batch_size, context_length, num_features). y_train Training labels of shape (batch_size, context_length). x_test Test set of features of shape (batch_size, prediction_length, num_features). Returns ------- Tuple Tensor Predictive GP samples of shape (batch_size, prediction_length, num_samples). Tensor Predictive mean of the GP of shape (batch_size, prediction_length). Tensor Predictive standard deviation of the GP of shape (batch_size, prediction_length). """ assert (self.context_length is not None), "The value of `context_length` must be set." assert (self.prediction_length is not None), "The value of `prediction_length` must be set." # Compute Cholesky factorization of training kernel matrix l_train = self._compute_cholesky_gp( self.kernel.kernel_matrix(x_train, x_train), self.context_length) lower_tri_solve = self.F.linalg.trsm( l_train, self.kernel.kernel_matrix(x_train, x_test)) predictive_mean = self.F.linalg.gemm2( lower_tri_solve, self.F.linalg.trsm(l_train, y_train.expand_dims(axis=-1)), transpose_a=True, ).squeeze(axis=-1) # Can rewrite second term as # :math:`||L^-1 * K(x_train,x_test||_2^2` # and only solve 1 equation predictive_covariance = self.kernel.kernel_matrix( x_test, x_test) - self.F.linalg.gemm2( lower_tri_solve, lower_tri_solve, transpose_a=True) # Extract diagonal entries of covariance matrix predictive_std = batch_diagonal( self.F, predictive_covariance, self.prediction_length, self.float_type, ) # If self.sample_noise = True, predictive covariance has sigma^2 on the diagonal if self.sample_noise: predictive_std = self.F.broadcast_add(predictive_std, self.sigma**2) predictive_std = self.F.sqrt(predictive_std).squeeze(axis=-1) # Compute sample from GP predictive distribution return ( self.sample(predictive_mean, predictive_covariance), predictive_mean, predictive_std, )
def compute_scale( self, F, data: Tensor, observed_indicator: Tensor # shapes (N, T, C) ) -> Tensor: # these will have shape (N, C) num_observed = F.sum(observed_indicator, axis=1) sum_observed = (data.abs() * observed_indicator).sum(axis=1) # first compute a global scale per-dimension total_observed = num_observed.sum(axis=0) denominator = F.maximum(total_observed, 1.0) default_scale = sum_observed.sum(axis=0) / denominator # shape (C, ) # then compute a per-item, per-dimension scale denominator = F.maximum(num_observed, 1.0) scale = sum_observed / denominator # shape (N, C) # use per-batch scale when no element is observed # or when the sequence contains only zeros cond = F.broadcast_greater(sum_observed, F.zeros_like(sum_observed)) scale = F.where( cond, scale, F.broadcast_mul(default_scale, F.ones_like(num_observed)), ) return F.maximum(scale, self.scale_min)
def lowrank_log_likelihood(rank: int, mu: Tensor, D: Tensor, W: Tensor, x: Tensor) -> Tensor: F = getF(mu) dim = F.ones_like(mu).sum(axis=-1).max() dim_factor = dim * math.log(2 * math.pi) if W is not None: batch_capacitance_tril = capacitance_tril(F=F, rank=rank, W=W, D=D) log_det_factor = log_det(F=F, batch_D=D, batch_capacitance_tril=batch_capacitance_tril) mahalanobis_factor = mahalanobis_distance( F=F, W=W, D=D, capacitance_tril=batch_capacitance_tril, x=x - mu) else: log_det_factor = D.log().sum(axis=-1) x_centered = x - mu mahalanobis_factor = F.broadcast_div(x_centered.square(), D).sum(axis=-1) ll: Tensor = -0.5 * (F.broadcast_add(dim_factor, log_det_factor) + mahalanobis_factor) return ll
def log_prob(self, x: Tensor) -> Tensor: F = self.F # masking data NaN's with ones to prevent NaN gradients x_non_nan = F.where(x != x, F.ones_like(x), x) # calculate likelihood for values which are not NaN non_nan_dist_log_likelihood = F.where( x != x, -x.ones_like() / 0.0, self.components[0].log_prob(x_non_nan), ) log_mix_weights = F.log(self.mixture_probs) # stack log probabilities of components component_log_likelihood = F.stack( *[non_nan_dist_log_likelihood, self.components[1].log_prob(x)], axis=-1, ) # compute mixture log probability by log-sum-exp summands = log_mix_weights + component_log_likelihood max_val = F.max_axis(summands, axis=-1, keepdims=True) sum_exp = F.sum(F.exp(F.broadcast_minus(summands, max_val)), axis=-1, keepdims=True) log_sum_exp = F.log(sum_exp) + max_val return log_sum_exp.squeeze(axis=-1)
def hybrid_forward( self, F, feat_static_cat: Tensor, # (batch_size, 1) past_time_feat: Tensor, # (batch_size, history_length, num_features) past_target: Tensor, # (batch_size, history_length) ) -> Tensor: """ Parameters ---------- F Function space feat_static_cat Shape: (batch_size, 1) past_time_feat Shape: (batch_size, history_length, num_features) past_target Shape: (batch_size, history_length) Returns ------- Tensor A batch of negative log likelihoods. """ fixed_effect, random_effect = self.compute_global_local( F, feat_static_cat, past_time_feat) loss = self.negative_normal_likelihood(F, past_target.expand_dims(axis=2), fixed_effect, random_effect) return loss
def get_issm_coeff( self, seasonal_indicators: Tensor # (batch_size, time_length) ) -> Tuple[Tensor, Tensor, Tensor]: F = getF(seasonal_indicators) emission_coeff_ls, transition_coeff_ls, innovation_coeff_ls = zip( self.nonseasonal_issm.get_issm_coeff(seasonal_indicators), *[ issm.get_issm_coeff( seasonal_indicators.slice_axis(axis=-1, begin=ix, end=ix + 1)) for ix, issm in enumerate(self.seasonal_issms) ], ) # stack emission and innovation coefficients emission_coeff = F.concat(*emission_coeff_ls, dim=-1) innovation_coeff = F.concat(*innovation_coeff_ls, dim=-1) # transition coefficient is block diagonal! transition_coeff = _make_block_diagonal(transition_coeff_ls) return emission_coeff, transition_coeff, innovation_coeff
def _expand_param(p: Tensor, num_samples: Optional[int] = None) -> Tensor: """ Expand parameters by num_samples along the first dimension. """ if num_samples is None: return p return p.expand_dims(axis=0).repeat(axis=0, repeats=num_samples)
def capacitance_tril(F, rank: Tensor, W: Tensor, D: Tensor) -> Tensor: r""" Parameters ---------- F rank W : (..., dim, rank) D : (..., dim) Returns ------- the capacitance matrix :math:`I + W^T D^{-1} W` """ # (..., dim, rank) Wt_D_inv_t = F.broadcast_div(W, D.expand_dims(axis=-1)) # (..., rank, rank) K = F.linalg_gemm2(Wt_D_inv_t, W, transpose_a=True) # (..., rank, rank) Id = F.broadcast_mul(F.ones_like(K), F.eye(rank)) # (..., rank, rank) return F.linalg.potrf(K + Id)
def log_prob(self, x: Tensor) -> Tensor: F = self.F alpha, beta = self.alpha, self.beta def gamma_log_prob(x, alpha, beta): return ( alpha * F.log(beta) - F.gammaln(alpha) + (alpha - 1) * F.log(x) - beta * x ) """ The gamma_log_prob(x) above returns NaNs for x<=0. Wherever there are NaN in either of the F.where() conditional vectors, then F.where() returns NaN at that entry as well, due to its indicator function multiplication: 1*f(x) + np.nan*0 = nan, since np.nan*0 return nan. Therefore replacing gamma_log_prob(x) with gamma_log_prob(abs(x) mitigates nan returns in cases of x<=0 without altering the value in cases of x>0. This is a known issue in pytorch as well https://github.com/pytorch/pytorch/issues/12986. """ # mask zeros to prevent NaN gradients for x==0 x_masked = F.where(x == 0, x.ones_like() * 0.5, x) return F.where( x > 0, gamma_log_prob(F.abs(x_masked), alpha, beta), -(10.0 ** 15) * F.ones_like(x), )
def weighted_average( F, x: Tensor, weights: Optional[Tensor] = None, axis: Optional[int] = None ) -> Tensor: """ Computes the weighted average of a given tensor across a given axis, masking values associated with weight zero, meaning instead of `nan * 0 = nan` you will get `0 * 0 = 0`. Parameters ---------- F The function space to use. x Input tensor, of which the average must be computed. weights Weights tensor, of the same shape as `x`. axis The axis along which to average `x` Returns ------- Tensor: The tensor with values averaged along the specified `axis`. """ if weights is not None: weighted_tensor = F.where( condition=weights, x=x * weights, y=F.zeros_like(x) ) sum_weights = F.maximum(1.0, weights.sum(axis=axis)) return weighted_tensor.sum(axis=axis) / sum_weights else: return x.mean(axis=axis)
def quantile_losses(self, obs: Tensor, quantiles: Tensor, levels: Tensor) -> Tensor: """ Computes quantile losses for all the quantiles specified. Parameters ---------- obs Ground truth observation. Shape: `(batch_size, seq_len, *event_shape)` quantiles Quantile values. Shape: `(batch_size, seq_len, *event_shape, num_quantiles)` levels Quantile levels. Shape: `(batch_size, seq_len, *event_shape, num_quantiles)` Returns ------- Tensor Quantile losses of shape: `(batch_size, seq_len, *event_shape, num_quantiles)` """ obs = obs.expand_dims(axis=-1) assert obs.shape[:-1] == quantiles.shape[:-1] assert obs.shape[:-1] == levels.shape[:-1] assert obs.shape[-1] == 1 return self.F.where( obs >= quantiles, levels * (obs - quantiles), (1 - levels) * (quantiles - obs), )
def _assemble_covariates( feat_dynamic_real: Tensor, feat_dynamic_cat: Tensor, feat_static_real: Tensor, feat_static_cat: Tensor, is_past: bool, ) -> Tensor: covariates = [] if feat_dynamic_real.shape[-1] > 0: covariates.append(feat_dynamic_real) if feat_static_real.shape[-1] > 0: covariates.append( feat_static_real.expand_dims(axis=1).repeat( axis=1, repeats=self.context_length if is_past else self.prediction_length, )) if len(covariates) > 0: covariates = F.concat(*covariates, dim=-1) covariates = self.covar_proj(covariates) else: covariates = None categories = [] if feat_dynamic_cat.shape[-1] > 0: categories.append(feat_dynamic_cat) if feat_static_cat.shape[-1] > 0: categories.append( feat_static_cat.expand_dims(axis=1).repeat( axis=1, repeats=self.context_length if is_past else self.prediction_length, )) if len(categories) > 0: categories = F.concat(*categories, dim=-1) embeddings = self.embedder(categories) embeddings = F.reshape(embeddings, shape=(0, 0, -4, self.d_hidden, -1)).sum(axis=-1) if covariates is not None: covariates = covariates + embeddings else: covariates = embeddings else: pass return covariates
def quantile(self, level: Tensor) -> Tensor: F = self.F # self.bin_probs.shape = (batch_shape, num_bins) probs = self.bin_probs.transpose() # (num_bins, batch_shape.T) # (batch_shape) zeros_batch_size = F.zeros_like( F.slice_axis(self.bin_probs, axis=-1, begin=0, end=1).squeeze( axis=-1 ) ) level = level.expand_dims(axis=0) # cdf shape (batch_size.T, levels) zeros_cdf = F.broadcast_add( zeros_batch_size.transpose().expand_dims(axis=-1), level.zeros_like(), ) start_state = (zeros_cdf, zeros_cdf.astype("int32")) def step(p, state): cdf, idx = state cdf = F.broadcast_add(cdf, p.expand_dims(axis=-1)) idx = F.where(F.broadcast_greater(cdf, level), idx, idx + 1) return zeros_batch_size, (cdf, idx) _, states = F.contrib.foreach(step, probs, start_state) _, idx = states # idx.shape = (batch.T, levels) # centers.shape = (batch, num_bins) # # expand centers to shape -> (levels, batch, num_bins) # so we can use pick with idx.T.shape = (levels, batch) # # zeros_cdf.shape (batch.T, levels) centers_expanded = F.broadcast_add( self.bin_centers.transpose().expand_dims(axis=-1), zeros_cdf.expand_dims(axis=0), ).transpose() # centers_expanded.shape = (levels, batch, num_bins) # idx.shape (batch.T, levels) a = centers_expanded.pick(idx.transpose(), axis=-1) return a
def quantile_internal(self, x: Tensor, axis: Optional[int] = None) -> Tensor: r""" Evaluates the quantile function at the quantile levels contained in `x`. Parameters ---------- x Tensor of shape ``*gamma.shape`` if axis=None, or containing an additional axis on the specified position, otherwise. axis Index of the axis containing the different quantile levels which are to be computed. Returns ------- Tensor Quantiles tensor, of the same shape as x. """ F = self.F # shapes of self # self.gamma: (*batch_shape) # self.knot_positions, self.b: (*batch_shape, num_pieces) # axis=None - passed at inference when num_samples is None # The shape of x is (*batch_shape). # The shapes of the parameters should be: # gamma: (*batch_shape), knot_positions, b: (*batch_shape, num_pieces) # They match the self. counterparts so no reshaping is needed # axis=0 - passed at inference when num_samples is not None # The shape of x is (num_samples, *batch_shape). # The shapes of the parameters should be: # gamma: (num_samples, *batch_shape), knot_positions, b: (num_samples, *batch_shape, num_pieces), # They do not match the self. counterparts and we need to expand the axis=0 to all of them. # axis=-2 - passed at training when we evaluate quantiles at knot_positions in order to compute a_tilde # The shape of x is shape(x) = shape(knot_positions) = (*batch_shape, num_pieces). # The shape of the parameters shopuld be: # gamma: (*batch_shape, 1), knot_positions: (*batch_shape, 1, num_pieces), b: (*batch_shape, 1, num_pieces) # They do not match the self. counterparts and we need to expand axis=-1 for gamma and axis=-2 for the rest. if axis is not None: gamma = self.gamma.expand_dims(axis=axis if axis == 0 else -1) knot_positions = self.knot_positions.expand_dims(axis=axis) b = self.b.expand_dims(axis=axis) else: gamma, knot_positions, b = self.gamma, self.knot_positions, self.b x_minus_knots = F.broadcast_minus(x.expand_dims(axis=-1), knot_positions) quantile = F.broadcast_add( gamma, F.sum(F.broadcast_mul(b, F.relu(x_minus_knots)), axis=-1)) return quantile
def s(mu: Tensor, b: Tensor) -> Tensor: ones = mu.ones_like() x = F.random.uniform(-0.5 * ones, 0.5 * ones, dtype=dtype) laplace_samples = mu - b * F.sign(x) * F.log( (1.0 - 2.0 * F.abs(x)).clip(1.0e-30, 1.0e30) # 1.0 - 2.0 * F.abs(x) ) return laplace_samples
def mahalanobis_distance( F, W: Tensor, D: Tensor, capacitance_tril: Tensor, x: Tensor ) -> Tensor: r""" Uses the Woodbury matrix identity .. math:: (W W^T + D)^{-1} = D^{-1} - D^{-1} W C^{-1} W^T D^{-1}, where :math:`C` is the capacitance matrix :math:`I + W^T D^{-1} W`, to compute the squared Mahalanobis distance :math:`x^T (W W^T + D)^{-1} x`. Parameters ---------- F W (..., dim, rank) D (..., dim) capacitance_tril (..., rank, rank) x (..., dim) Returns ------- """ xx = x.expand_dims(axis=-1) # (..., rank, 1) Wt_Dinv_x = F.linalg_gemm2( F.broadcast_div(W, D.expand_dims(axis=-1)), xx, transpose_a=True ) # compute x^T D^-1 x, (...,) maholanobis_D_inv = F.broadcast_div(x.square(), D).sum(axis=-1) # (..., rank) L_inv_Wt_Dinv_x = F.linalg_trsm(capacitance_tril, Wt_Dinv_x).squeeze( axis=-1 ) maholanobis_L = L_inv_Wt_Dinv_x.square().sum(axis=-1).squeeze() return F.broadcast_minus(maholanobis_D_inv, maholanobis_L)
def __init__(self, alpha: Tensor, beta: Tensor, one_probability: Tensor) -> None: super().__init__( alpha=alpha, beta=beta, zero_probability=alpha.zeros_like(), one_probability=one_probability, )
def get_gp_params( self, F, past_target: Tensor, past_time_feat: Tensor, feat_static_cat: Tensor, ) -> Tuple: """ This function returns the GP hyper-parameters for the model. Parameters ---------- F A module that can either refer to the Symbol API or the NDArray API in MXNet. past_target Training time series values of shape (batch_size, context_length). past_time_feat Training features of shape (batch_size, context_length, num_features). feat_static_cat Time series indices of shape (batch_size, 1). Returns ------- Tuple Tuple of kernel hyper-parameters of length num_hyperparams. Each is a Tensor of shape (batch_size, 1, 1). Model noise sigma. Tensor of shape (batch_size, 1, 1). """ output = self.embedding(feat_static_cat.squeeze() ) # Shape (batch_size, num_hyperparams + 1) kernel_args = self.proj_kernel_args(output) sigma = softplus( F, output.slice_axis( # sigma is the last hyper-parameter axis=1, begin=self.num_hyperparams, end=self.num_hyperparams + 1, ), ) if self.params_scaling: scalings = self.kernel_output.gp_params_scaling( F, past_target, past_time_feat) sigma = F.broadcast_mul(sigma, scalings[self.num_hyperparams]) kernel_args = (F.broadcast_mul(kernel_arg, scaling) for kernel_arg, scaling in zip( kernel_args, scalings[0:self.num_hyperparams])) min_value = 1e-5 max_value = 1e8 kernel_args = (kernel_arg.clip(min_value, max_value).expand_dims(axis=2) for kernel_arg in kernel_args) sigma = sigma.clip(min_value, max_value).expand_dims(axis=2) return kernel_args, sigma
def quantile(self, level: Tensor): F = self.F # we consider level to be an independent axis and so expand it # to shape (num_levels, 1, 1, ...) for _ in range(self.all_dim): level = level.expand_dims(axis=-1) x_shifted = F.broadcast_div(F.power(1 - level, -self.xi) - 1, self.xi) x = F.broadcast_mul(x_shifted, self.beta) return x
def compute_scale( self, F, data: Tensor, observed_indicator: Tensor, # shapes (N, T, C) or (N, C, T) ) -> Tensor: """ Parameters ---------- F A module that can either refer to the Symbol API or the NDArray API in MXNet. data tensor containing the data to be scaled. observed_indicator observed_indicator: binary tensor with the same shape as ``data``, that has 1 in correspondence of observed data points, and 0 in correspondence of missing data points. Returns ------- Tensor shape (N, C), computed according to the average absolute value over time of the observed values. """ # these will have shape (N, C) num_observed = F.sum(observed_indicator, axis=self.axis) sum_observed = (data.abs() * observed_indicator).sum(axis=self.axis) # first compute a global scale per-dimension total_observed = num_observed.sum(axis=0) denominator = F.maximum(total_observed, 1.0) if self.default_scale is not None: default_scale = self.default_scale * F.ones_like(num_observed) else: # shape (C, ) default_scale = sum_observed.sum(axis=0) / denominator # then compute a per-item, per-dimension scale denominator = F.maximum(num_observed, 1.0) scale = sum_observed / denominator # shape (N, C) # use per-batch scale when no element is observed # or when the sequence contains only zeros cond = F.broadcast_greater(sum_observed, F.zeros_like(sum_observed)) scale = F.where( cond, scale, F.broadcast_mul(default_scale, F.ones_like(num_observed)), ) return F.maximum(scale, self.minimum_scale)
def log_intensity(self, x: Tensor) -> Tensor: r""" Logarithm of the intensity (a.k.a. hazard) function. The intensity is defined as :math:`\lambda(x) = p(x) / S(x)`. The intensity of the Weibull distribution is :math:`\lambda(x) = b * k * x^{k - 1}`. """ log_x = x.clip(1e-10, np.inf).log() return self.rate.log() + self.shape.log() + (self.shape - 1) * log_x
def log_survival(self, x: Tensor) -> Tensor: r""" Logarithm of the survival function :math:`\log S(x) = \log(1 - CDF(x))`. We define :math:`z = (\log(x) - \mu) / \sigma` and obtain the survival function as :math:`S(x) = sigmoid(-z)`, or equivalently :math:`\log S(x) = -\log(1 + \exp(z))`. """ log_x = x.clip(1e-20, np.inf).log() z = (log_x - self.mu) / self.sigma F = getF(x) return -F.Activation(z, "softrelu")
def quantile(self, level: Tensor) -> Tensor: F = self.F # we consider level to be an independent axis and so expand it # to shape (num_levels, 1, 1, ...) for _ in range(self.all_dim): level = level.expand_dims(axis=-1) return F.broadcast_add( self.mu, F.broadcast_mul(self.sigma, math.sqrt(2.0) * F.erfinv(2.0 * level - 1.0)), )